...
No HCatalog-specific setup is required for the HCatInputFormat and HCatOutputFormat interfaces.
Note: HCatalog is not thread safe.
HCatInputFormat
The HCatInputFormat is used with MapReduce jobs to read data from HCatalog-managed tables.
...
No Format |
---|
/** * Set the input to use for the Job. This queries the metadata server with * the specified partition predicates, gets the matching partitions, puts * the information in the conf object. The inputInfo object is updated with * information needed in the client context * @param job the job object * @param inputJobInfo the input info for table to read * @throws IOException the exception in communicating with the metadata server */ public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException; /** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job,HCatSchema hcatSchema) throws IOException; /** * Get the HCatTable schema for the table specified in the HCatInputFormat.setInput * call on the specified job context. This information is available only after * HCatInputFormat.setInput has been called for a JobContext. * @param context the context * @return the table schema * @throws IOException if HCatInputFormat.setInput has not been called * for the current context */ public static HCatSchema getTableSchema(JobContext context) throws IOException; |
HCatOutputFormat
HCatOutputFormat is used with MapReduce jobs to write data to HCatalog-managed tables.
...
HCatalog Data Type | Java Class in MapReduce | Values | ||
---|---|---|---|---|
TINYINT | java.lang.Byte | -128 to 127 | ||
SMALLINT | java.lang.Short | -(2^15) to (2^15)-1, which is -32,768 to 32,767 | ||
INT | java.lang.Integer | -(2^31) to (2^31)-1, which is -2,147,483,648 to 2,147,483,647 | ||
BIGINT | java.lang.Long | -(2^63) to (2^63)-1, which is -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 | ||
BOOLEAN | java.lang.Boolean | true or false | ||
FLOAT | java.lang.Float | single-precision floating-point value | ||
DOUBLE | java.lang.Double | double-precision floating-point value | ||
DECIMAL | java.math.BigDecimal | exact floating-point value with 38-digit precision | ||
<ac:structured-macro ac:name="unmigrated-wiki-markup" ac:schema-version="1" ac:macro-id="cf29b685-d6a5-43e9-9875-1955afc7f0dc"><ac:plain-text-body><![CDATA[ | BINARY | byte[] | binary data | ]]></ac:plain-text-body></ac:structured-macro> |
STRING | java.lang.String | character string | ||
STRUCT | java.util.List | structured data | ||
ARRAY | java.util.List | values of one data type | ||
MAP | java.util.Map | key-value pairs |
...
No Format |
---|
public class GroupByAge extends Configured implements Tool { public static class Map extends Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable> { int age; @Override protected void map( WritableComparable key, HCatRecord value, org.apache.hadoop.mapreduce.Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException { age = (Integer) value.get(1); context.write(new IntWritable(age), new IntWritable(1)); } } public static class Reduce extends Reducer<IntWritable, IntWritable, WritableComparable, HCatRecord> { @Override protected void reduce( IntWritable key, java.lang.Iterable<IntWritable> values, org.apache.hadoop.mapreduce.Reducer<IntWritable, IntWritable, WritableComparable, HCatRecord>.Context context) throws IOException, InterruptedException { int sum = 0; Iterator<IntWritable> iter = values.iterator(); while (iter.hasNext()) { sum++; iter.next(); } HCatRecord record = new DefaultHCatRecord(2); record.set(0, key.get()); record.set(1, sum); context.write(null, record); } } public int run(String[] args) throws Exception { Configuration conf = getConf(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); String inputTableName = args[0]; String outputTableName = args[1]; String dbName = null; Job job = new Job(conf, "GroupByAge"); HCatInputFormat.setInput(job, InputJobInfo.create(dbName, inputTableName, null)); // initialize HCatOutputFormat job.setInputFormatClass(HCatInputFormat.class); job.setJarByClass(GroupByAge.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(WritableComparable.class); job.setOutputValueClass(DefaultHCatRecord.class); HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null)); HCatSchema s = HCatOutputFormat.getTableSchema(job); System.err.println("INFO: output schema explicitly set for writing:" + s); HCatOutputFormat.setSchema(job, s); job.setOutputFormatClass(HCatOutputFormat.class); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new GroupByAge(), args); System.exit(exitCode); } } |
...
Panel | ||||||
---|---|---|---|---|---|---|
| ||||||
Previous: Load and Store Interfaces General: HCatalog Manual – WebHCat Manual – Hive Wiki Home – Hive Project Site |