org.apache.avro.mapreduce.AvroJob Java Exaples

Source File: CombinedAvroKeyInputFormat.java From datafu with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit inputSplit,
                                                                 TaskAttemptContext context) throws IOException
{
  Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
  if (null == readerSchema) {
    LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
    LOG.info("Using a reader schema equal to the writer schema.");
  }
      
  Object c = CombinedAvroKeyRecordReader.class;
  return new CombineFileRecordReader<AvroKey<T>, NullWritable>((CombineFileSplit) inputSplit, context, (Class<? extends RecordReader<AvroKey<T>, NullWritable>>)c);
}

Source File: TestMapReduceHBase.java From kite with Apache License 2.0

5 votes

@Test
@SuppressWarnings("deprecation")
public void testJobEmptyView() throws Exception {
  Job job = new Job(HBaseTestUtils.getConf());

  String datasetName = tableName + ".TestGenericEntity";

  Dataset<GenericRecord> inputDataset = repo.create("default", "in",
      new DatasetDescriptor.Builder()
          .schemaLiteral(testGenericEntity).build());

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaLiteral(testGenericEntity)
      .build();
  Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

  DatasetKeyInputFormat.configure(job).readFrom(inputDataset);

  job.setMapperClass(AvroKeyWrapperMapper.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(NullWritable.class);
  AvroJob.setMapOutputKeySchema(job, new Schema.Parser().parse(testGenericEntity));

  job.setReducerClass(AvroKeyWrapperReducer.class);
  job.setOutputKeyClass(GenericData.Record.class);
  job.setOutputValueClass(Void.class);
  AvroJob.setOutputKeySchema(job, new Schema.Parser().parse(testGenericEntity));

  DatasetKeyOutputFormat.configure(job).writeTo(outputDataset);

  Assert.assertTrue(job.waitForCompletion(true));
}

Source File: CQLMapper.java From aegisthus with Apache License 2.0

5 votes

@Override protected void setup(
        Context context)
        throws IOException, InterruptedException {
    avroSchema = AvroJob.getOutputKeySchema(context.getConfiguration());

    cfMetaData = CFMetadataUtility.initializeCfMetaData(context.getConfiguration());
    cfDef = cfMetaData.getCfDef();
    initBuilder();

    /* This exporter assumes tables are composite, which should be true of all current schemas */
    if (!cfDef.isComposite) throw new RuntimeException("Only can export composite CQL table schemas.");
}

Source File: SegmentPreprocessingMapper.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void setup(final Context context) {
  Configuration configuration = context.getConfiguration();

  String tableName = configuration.get(JobConfigConstants.SEGMENT_TABLE_NAME);

  _isAppend = configuration.get(InternalConfigConstants.IS_APPEND).equalsIgnoreCase("true");

  if (_isAppend) {
    // Get time column name
    _timeColumn = configuration.get(InternalConfigConstants.TIME_COLUMN_CONFIG);

    // Get sample time column value
    String timeColumnValue = configuration.get(InternalConfigConstants.TIME_COLUMN_VALUE);
    String pushFrequency = configuration.get(InternalConfigConstants.SEGMENT_PUSH_FREQUENCY);

    String timeType = configuration.get(InternalConfigConstants.SEGMENT_TIME_TYPE);
    String timeFormat = configuration.get(InternalConfigConstants.SEGMENT_TIME_FORMAT);
    DateTimeFormatSpec dateTimeFormatSpec;
    if (timeFormat.equals(DateTimeFieldSpec.TimeFormat.EPOCH.toString())) {
      dateTimeFormatSpec = new DateTimeFormatSpec(1, timeType, timeFormat);
    } else {
      dateTimeFormatSpec = new DateTimeFormatSpec(1, timeType, timeFormat,
          configuration.get(InternalConfigConstants.SEGMENT_TIME_SDF_PATTERN));
    }
    _normalizedDateSegmentNameGenerator =
        new NormalizedDateSegmentNameGenerator(tableName, null, false, "APPEND", pushFrequency, dateTimeFormatSpec);
    _sampleNormalizedTimeColumnValue = _normalizedDateSegmentNameGenerator.getNormalizedDate(timeColumnValue);
  }

  String sortedColumn = configuration.get(InternalConfigConstants.SORTED_COLUMN_CONFIG);
  // Logging the configs for the mapper
  LOGGER.info("Sorted Column: " + sortedColumn);
  if (sortedColumn != null) {
    _sortedColumn = sortedColumn;
  }
  _outputKeySchema = AvroJob.getMapOutputKeySchema(configuration);
  _outputSchema = AvroJob.getMapOutputValueSchema(configuration);
  _enablePartitioning = Boolean.parseBoolean(configuration.get(InternalConfigConstants.ENABLE_PARTITIONING, "false"));
}

Source File: AvroKeyWithMetadataOutputFormat.java From datafu with Apache License 2.0

5 votes

/** {@inheritDoc} */
@Override
@SuppressWarnings("unchecked")
public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  // Get the writer schema.
  Schema writerSchema = AvroJob.getOutputKeySchema(context.getConfiguration());
  if (null == writerSchema) {
    throw new IOException(
        "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema().");
  }

  return mRecordWriterFactory.create(
      writerSchema, getCompressionCodec(context), getAvroFileOutputStream(context), context.getConfiguration());
}

Source File: AvroHdfsFileSink.java From components with Apache License 2.0

5 votes

@Override
protected void configure(Job job, KV<AvroKey<IndexedRecord>, NullWritable> sample) {
    super.configure(job, sample);
    AvroKey<IndexedRecord> k = sample.getKey();
    AvroJob.setOutputKeySchema(job, k.datum().getSchema());
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC);
}

Source File: CompactionAvroJobConfigurator.java From incubator-gobblin with Apache License 2.0

5 votes

@Override
protected void configureSchema(Job job) throws IOException {
  Schema newestSchema = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs);
  if (newestSchema != null) {
    if (this.state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true)) {
      AvroJob.setInputKeySchema(job, newestSchema);
    }
    AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getDedupKeySchema(newestSchema) : newestSchema);
    AvroJob.setMapOutputValueSchema(job, newestSchema);
    AvroJob.setOutputKeySchema(job, newestSchema);
  }
}

Source File: AvroKeyCombineFileRecordReader.java From incubator-gobblin with Apache License 2.0

5 votes

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
  Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
  if (schema != null) {
    return schema;
  }

  Path path = split.getPath(idx);
  FileSystem fs = path.getFileSystem(cx.getConfiguration());
  return AvroUtils.getSchemaFromDataFile(path, fs);
}

Source File: MRCompactorAvroKeyDedupJobRunner.java From incubator-gobblin with Apache License 2.0

5 votes

private void configureSchema(Job job) throws IOException {
  Schema newestSchema = getNewestSchemaFromSource(job, this.fs);
  if (this.useSingleInputSchema) {
    AvroJob.setInputKeySchema(job, newestSchema);
  }
  AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
  AvroJob.setMapOutputValueSchema(job, newestSchema);
  AvroJob.setOutputKeySchema(job, newestSchema);
}

Source File: AvroKeyMapper.java From incubator-gobblin with Apache License 2.0

5 votes

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  this.keySchema = AvroJob.getMapOutputKeySchema(context.getConfiguration());
  this.outKey = new AvroKey<>();
  this.outKey.datum(new GenericData.Record(this.keySchema));
  this.outValue = new AvroValue<>();
}

Source File: ExportHBaseTableToAvro.java From HBase-ToHDFS with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String rowKeyColumn = "";
  
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToAvro.class);
  job.setJobName("ExportHBaseTableToAvro ");

  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  
  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());

  AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}

Source File: CombinedAvroKeyInputFormat.java From datafu with Apache License 2.0

4 votes

public CombinedAvroKeyRecordReader(CombineFileSplit inputSplit, TaskAttemptContext context, Integer idx)
{
  super(AvroJob.getInputKeySchema(context.getConfiguration()));
  this.inputSplit = inputSplit;
  this.idx = idx;            
}

Source File: AggregationPhaseJob.java From incubator-pinot with Apache License 2.0

4 votes

public Job run() throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJobName(name);
  job.setJarByClass(AggregationPhaseJob.class);

  FileSystem fs = FileSystem.get(getConf());
  Configuration configuration = job.getConfiguration();

  // Properties
  LOGGER.info("Properties {}", props);

   // Input Path
  String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
  LOGGER.info("Input path dir: " + inputPathDir);
  for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
    LOGGER.info("Adding input:" + inputPath);
    Path input = new Path(inputPath);
    FileInputFormat.addInputPath(job, input);
  }

  // Output path
  Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
  LOGGER.info("Output path dir: " + outputPath.toString());
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }
  FileOutputFormat.setOutputPath(job, outputPath);

  // Schema
  Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
  LOGGER.info("Schema : {}", avroSchema.toString(true));
  job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());

  // ThirdEyeConfig
  String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
  props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
  String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
  props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
  ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
  LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
  job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

  // Map config
  job.setMapperClass(AggregationMapper.class);
  job.setInputFormatClass(AvroKeyInputFormat.class);
  job.setMapOutputKeyClass(BytesWritable.class);
  job.setMapOutputValueClass(BytesWritable.class);

  // Reduce config
  job.setReducerClass(AggregationReducer.class);
  job.setOutputKeyClass(AvroKey.class);
  job.setOutputValueClass(NullWritable.class);
  AvroJob.setOutputKeySchema(job, avroSchema);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
  LOGGER.info("Num Reducers : {}", numReducers);
  if (StringUtils.isNotBlank(numReducers)) {
    job.setNumReduceTasks(Integer.valueOf(numReducers));
    LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
  }

  job.waitForCompletion(true);

  Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
  LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
  if (counter.getValue() == 0) {
    throw new IllegalStateException("No input records in " + inputPathDir);
  }
  counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
  LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());

  for (String metric : thirdeyeConfig.getMetricNames()) {
    counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
  }

  return job;
}

Source File: DerivedColumnTransformationPhaseJob.java From incubator-pinot with Apache License 2.0

4 votes

public Job run() throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJobName(name);
  job.setJarByClass(DerivedColumnTransformationPhaseJob.class);

  Configuration configuration = job.getConfiguration();
  FileSystem fs = FileSystem.get(configuration);

  // Input Path
  String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH);
  LOGGER.info("Input path dir: " + inputPathDir);
  for (String inputPath : inputPathDir.split(",")) {
    LOGGER.info("Adding input:" + inputPath);
    Path input = new Path(inputPath);
    FileInputFormat.addInputPath(job, input);
  }

  // Topk path
  String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH);
  LOGGER.info("Topk path : " + topkPath);

  // Output path
  Path outputPath = new Path(getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH));
  LOGGER.info("Output path dir: " + outputPath.toString());
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }
  FileOutputFormat.setOutputPath(job, outputPath);

  // Schema
  Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
  LOGGER.info("Schema : {}", avroSchema.toString(true));

  // ThirdEyeConfig
  String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
  props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
  String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
  props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
  ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
  job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
      OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
  LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());

  // New schema
  Schema outputSchema = newSchema(thirdeyeConfig);
  job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString());

  // Map config
  job.setMapperClass(DerivedColumnTransformationPhaseMapper.class);
  job.setInputFormatClass(AvroKeyInputFormat.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(NullWritable.class);
  AvroJob.setOutputKeySchema(job, outputSchema);
  LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class);
  AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema);

  job.setNumReduceTasks(0);

  job.waitForCompletion(true);

  return job;
}

Source File: OSMRunner.java From geowave with Apache License 2.0

4 votes

@Override
public int run(final String[] args) throws Exception {

  final Configuration conf = getConf();
  conf.set("tableName", ingestOptions.getQualifiedTableName());
  conf.set("osmVisibility", ingestOptions.getVisibilityOptions().getVisibility());

  // job settings
  final Job job = Job.getInstance(conf, ingestOptions.getJobName());
  job.setJarByClass(OSMRunner.class);

  switch (ingestOptions.getMapperType()) {
    case "NODE": {
      configureSchema(AvroNode.getClassSchema());
      inputAvroFile = ingestOptions.getNodesBasePath();
      job.setMapperClass(OSMNodeMapper.class);
      break;
    }
    case "WAY": {
      configureSchema(AvroWay.getClassSchema());
      inputAvroFile = ingestOptions.getWaysBasePath();
      job.setMapperClass(OSMWayMapper.class);
      break;
    }
    case "RELATION": {
      configureSchema(AvroRelation.getClassSchema());
      inputAvroFile = ingestOptions.getRelationsBasePath();
      job.setMapperClass(OSMRelationMapper.class);
      break;
    }
    default:
      break;
  }
  if ((avroSchema == null) || (inputAvroFile == null)) {
    throw new MissingArgumentException(
        "argument for mapper type must be one of: NODE, WAY, or RELATION");
  }

  enableLocalityGroups(ingestOptions);

  // input format
  job.setInputFormatClass(AvroKeyInputFormat.class);
  FileInputFormat.setInputPaths(job, inputAvroFile);
  AvroJob.setInputKeySchema(job, avroSchema);

  // mappper

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Mutation.class);
  job.setOutputFormatClass(AccumuloOutputFormat.class);
  AccumuloOutputFormat.setConnectorInfo(
      job,
      accumuloOptions.getUser(),
      new PasswordToken(accumuloOptions.getPassword()));
  AccumuloOutputFormat.setCreateTables(job, true);
  AccumuloOutputFormat.setDefaultTableName(job, ingestOptions.getQualifiedTableName());
  AccumuloOutputFormat.setZooKeeperInstance(
      job,
      new ClientConfiguration().withInstance(accumuloOptions.getInstance()).withZkHosts(
          accumuloOptions.getZookeeper()));

  // reducer
  job.setNumReduceTasks(0);

  return job.waitForCompletion(true) ? 0 : -1;
}

Source File: SSTableExport.java From aegisthus with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(SSTableExport.class);
    CommandLine cl = getOptions(args);
    if (cl == null) {
        return 1;
    }

    // Check all of the paths and load the sstable version from the input filenames
    List<Path> paths = Lists.newArrayList();
    if (cl.hasOption(Feature.CMD_ARG_INPUT_FILE)) {
        for (String input : cl.getOptionValues(Feature.CMD_ARG_INPUT_FILE)) {
            checkVersionFromFilename(input);
            paths.add(new Path(input));
        }
    }
    if (cl.hasOption(Feature.CMD_ARG_INPUT_DIR)) {
        paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(Feature.CMD_ARG_INPUT_DIR)));
    }

    String avroSchemaString = getAvroSchema(cl.getOptionValue(Feature.CMD_ARG_AVRO_SCHEMA_FILE), job.getConfiguration());
    Schema avroSchema = new Schema.Parser().parse(avroSchemaString);

    // At this point we have the version of sstable that we can use for this run
    job.getConfiguration().set(Aegisthus.Feature.CONF_SSTABLE_VERSION, version.toString());

    if (job.getConfiguration().get(Aegisthus.Feature.CONF_CQL_SCHEMA) != null) {
        setConfigurationFromCql(job.getConfiguration());
    }

    job.setInputFormatClass(AegisthusInputFormat.class);
    job.setMapperClass(CQLMapper.class);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    AvroJob.setOutputKeySchema(job, avroSchema);

    // Map-only job
    job.setNumReduceTasks(0);

    TextInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()]));
    FileOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(Feature.CMD_ARG_OUTPUT_DIR)));

    job.submit();
    System.out.println(job.getJobID());
    System.out.println(job.getTrackingURL());
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0

3 votes

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(ReplicatedJoin.UserOptions.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path usersPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.USERS));
  Path outputPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);

  job.setJarByClass(BloomFilterCreator.class);
  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  AvroJob.setOutputKeySchema(job, AvroBytesRecord.SCHEMA);
  job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setOutputFormatClass(AvroKeyOutputFormat.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, usersPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.setNumReduceTasks(1);

  return job.waitForCompletion(true) ? 0 : 1;
}

org.apache.avro.mapreduce.AvroJob Java Examples