Page History

...

No HCatalog-specific setup is required for the HCatInputFormat and HCatOutputFormat interfaces.

Note: HCatalog is not thread safe.

HCatInputFormat

The HCatInputFormat is used with MapReduce jobs to read data from HCatalog-managed tables.

...

No Format

  /**
   * Set the input to use for the Job. This queries the metadata server with
   * the specified partition predicates, gets the matching partitions, puts
   * the information in the conf object. The inputInfo object is updated with
   * information needed in the client context
   * @param job the job object
   * @param inputJobInfo the input info for table to read
   * @throws IOException the exception in communicating with the metadata server
   */
  public static void setInput(Job job,
      InputJobInfo inputJobInfo) throws IOException;

  /**
   * Set the schema for the HCatRecord data returned by HCatInputFormat.
   * @param job the job object
   * @param hcatSchema the schema to use as the consolidated schema
   */
  public static void setOutputSchema(Job job,HCatSchema hcatSchema) 
    throws IOException;

  /**
   * Get the HCatTable schema for the table specified in the HCatInputFormat.setInput
   * call on the specified job context. This information is available only after
   * HCatInputFormat.setInput has been called for a JobContext.
   * @param context the context
   * @return the table schema
   * @throws IOException if HCatInputFormat.setInput has not been called 
   *                     for the current context
   */
  public static HCatSchema getTableSchema(JobContext context) 
    throws IOException;

HCatOutputFormat

HCatOutputFormat is used with MapReduce jobs to write data to HCatalog-managed tables.

...

HCatalog Data Type	Java Class in MapReduce	Values
TINYINT	java.lang.Byte	-128 to 127
SMALLINT	java.lang.Short	-(2^15) to (2^15)-1, which is -32,768 to 32,767
INT	java.lang.Integer	-(2^31) to (2^31)-1, which is -2,147,483,648 to 2,147,483,647
BIGINT	java.lang.Long	-(2^63) to (2^63)-1, which is -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
BOOLEAN	java.lang.Boolean	true or false
FLOAT	java.lang.Float	single-precision floating-point value
DOUBLE	java.lang.Double	double-precision floating-point value
DECIMAL	java.math.BigDecimal	exact floating-point value with 38-digit precision
<ac:structured-macro ac:name="unmigrated-wiki-markup" ac:schema-version="1" ac:macro-id="cf29b685-d6a5-43e9-9875-1955afc7f0dc"><ac:plain-text-body><![CDATA[	BINARY	byte[]	binary data	]]></ac:plain-text-body></ac:structured-macro>
STRING	java.lang.String	character string
STRUCT	java.util.List	structured data
ARRAY	java.util.List	values of one data type
MAP	java.util.Map	key-value pairs

...

No Format

public class GroupByAge extends Configured implements Tool {

    public static class Map extends
            Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable> {

        int age;

        @Override
        protected void map(
                WritableComparable key,
                HCatRecord value,
                org.apache.hadoop.mapreduce.Mapper<WritableComparable, HCatRecord, 
                        IntWritable, IntWritable>.Context context)
                throws IOException, InterruptedException {
            age = (Integer) value.get(1);
            context.write(new IntWritable(age), new IntWritable(1));
        }
    }

    public static class Reduce extends Reducer<IntWritable, IntWritable,
    WritableComparable, HCatRecord> {


      @Override 
      protected void reduce(
              IntWritable key,
              java.lang.Iterable<IntWritable> values, 
              org.apache.hadoop.mapreduce.Reducer<IntWritable, IntWritable,
                      WritableComparable, HCatRecord>.Context context)
              throws IOException, InterruptedException {
          int sum = 0;
          Iterator<IntWritable> iter = values.iterator();
          while (iter.hasNext()) {
              sum++;
              iter.next();
          }
          HCatRecord record = new DefaultHCatRecord(2);
          record.set(0, key.get());
          record.set(1, sum);

          context.write(null, record);
        }
    }

    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        args = new GenericOptionsParser(conf, args).getRemainingArgs();

        String inputTableName = args[0];
        String outputTableName = args[1];
        String dbName = null;

        Job job = new Job(conf, "GroupByAge");
        HCatInputFormat.setInput(job, InputJobInfo.create(dbName,
                inputTableName, null));
        // initialize HCatOutputFormat

        job.setInputFormatClass(HCatInputFormat.class);
        job.setJarByClass(GroupByAge.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(WritableComparable.class);
        job.setOutputValueClass(DefaultHCatRecord.class);
        HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName,
                outputTableName, null));
        HCatSchema s = HCatOutputFormat.getTableSchema(job);
        System.err.println("INFO: output schema explicitly set for writing:"
                + s);
        HCatOutputFormat.setSchema(job, s);
        job.setOutputFormatClass(HCatOutputFormat.class);
        return (job.waitForCompletion(true) ? 0 : 1);
    }

    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new GroupByAge(), args);
        System.exit(exitCode);
    }
}

...

Panel

titleColor	indigo
titleBGColor	silver
title	Navigation Links

Previous: Load and Store Interfaces
Next: Reader and Writer Interfaces

General: HCatalog Manual – WebHCat Manual – Hive Wiki Home – Hive Project Site
Old version of this document (HCatalog 0.5.0): Input and Output Interfaces

Space shortcuts

Child pages

Versions Compared

Old Version 7

New Version 8

Key

HCatInputFormat

HCatOutputFormat