Java Code Examples for org.apache.avro.mapred.AvroJob#setInputSchema()

The following examples show how to use org.apache.avro.mapred.AvroJob#setInputSchema() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractAvroJob.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a JobConf for a map-only job with an explicitly set input Schema.
 * 
 * @param mapperClass AvroMapper subclass implementing the map phase
 * @param inputSchema Schema of the input data.
 * @param outputSchema Schema of the mapper output
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, 
                             Schema inputSchema, 
                             Schema outputSchema) throws IOException, URISyntaxException
{
  JobConf conf = createJobConf();

  AvroJob.setMapperClass(conf, mapperClass);
  AvroJob.setReducerClass(conf, AvroReducer.class);
  
  AvroJob.setInputSchema(conf, inputSchema);
  AvroJob.setOutputSchema(conf, outputSchema);
  
  conf.setNumReduceTasks(0);

  return conf;
}
 
Example 2
Source File: AbstractAvroJob.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a JobConf for a map-reducer job with an explicitly set input schema.
 * 
 * @param mapperClass AvroMapper subclass for the mapper.
 * @param reducerClass AvroReducer subclass for the reducer.
 * @param inputSchema Schema of the input data.
 * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
 * @param outputSchema Reducer output schema
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
                             Class<? extends AvroReducer> reducerClass,
                             Schema inputSchema,
                             Schema mapperOutputSchema,
                             Schema outputSchema) throws IOException, URISyntaxException
{
  JobConf conf = createJobConf();
  
  AvroJob.setMapperClass(conf, mapperClass);
  AvroJob.setReducerClass(conf, reducerClass);
  
  AvroJob.setInputSchema(conf, inputSchema);
  AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
  AvroJob.setOutputSchema(conf, outputSchema);

  return conf;
}
 
Example 3
Source File: AbstractAvroJob.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a JobConf for a map-reduce job that uses a combiner and has an explicitly set input schema.
 * 
 * @param mapperClass AvroMapper subclass for the mapper.
 * @param reducerClass AvroReducer subclass for the reducer.
 * @param combinerClass AvroReducer subclass for the combiner.
 * @param inputSchema Schema of the input data.
 * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
 * @param outputSchema Reducer output schema
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
                             Class<? extends AvroReducer> reducerClass,
                             Class<? extends AvroReducer> combinerClass,
                             Schema inputSchema,
                             Schema mapperOutputSchema,
                             Schema outputSchema) throws IOException, URISyntaxException
{
  JobConf conf = createJobConf();
  
  AvroJob.setMapperClass(conf, mapperClass);
  AvroJob.setReducerClass(conf, reducerClass);
  AvroJob.setCombinerClass(conf, combinerClass);
  
  AvroJob.setInputSchema(conf, inputSchema);
  AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
  AvroJob.setOutputSchema(conf, outputSchema);

  return conf;
}
 
Example 4
Source File: AvroFileAccessor.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Override
public boolean openForRead() throws Exception {
    // Pass the schema to the AvroInputFormat
    AvroJob.setInputSchema(jobConf, schema);

    // The avroWrapper required for the iteration
    avroWrapper = new AvroWrapper<>();

    return super.openForRead();
}
 
Example 5
Source File: AbstractAvroJob.java    From ml-ease with Apache License 2.0 4 votes vote down vote up
/**
 * Sets up various standard settings in the JobConf. You probably don't want to mess with this.
 * 
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
protected  JobConf createJobConf() throws IOException, URISyntaxException
{
  JobConf conf = new JobConf();
  
  conf.setJobName(getJobId());
  conf.setInputFormat(AvroInputFormat.class);
  conf.setOutputFormat(AvroOutputFormat.class);
  
  AvroOutputFormat.setDeflateLevel(conf, 9);
  
  String hadoop_ugi = _config.getString("hadoop.job.ugi", null);
  if (hadoop_ugi != null)
  {
      conf.set("hadoop.job.ugi", hadoop_ugi);
  }
  if (_config.getBoolean("is.local", false))
  {
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.local.dir", "/tmp/map-red");

    _log.info("Running locally, no hadoop jar set.");
  }
  
  // set JVM options if present
  if (_config.containsKey("mapred.child.java.opts"))
  {
    conf.set("mapred.child.java.opts", _config.getString("mapred.child.java.opts"));
    _log.info("mapred.child.java.opts set to " + _config.getString("mapred.child.java.opts"));
  }

  if (_config.containsKey(INPUT_PATHS))
  {
    List<String> inputPathnames = _config.getStringList(INPUT_PATHS);
    for (String pathname : inputPathnames)
    {
      AvroUtils.addAllSubPaths(conf, new Path(pathname));
    }
    AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
  }

  if (_config.containsKey(OUTPUT_PATH))
  {
    Path path = new Path(_config.get(OUTPUT_PATH));
    AvroOutputFormat.setOutputPath(conf, path);

    if (_config.getBoolean("force.output.overwrite", false))
    {
      FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
      fs.delete(FileOutputFormat.getOutputPath(conf), true);
    }
  }
  // set all hadoop configs
  for (String key : _config.keySet()) 
  {
    String lowerCase = key.toLowerCase();
    if ( lowerCase.startsWith(HADOOP_PREFIX)) 
    {
        String newKey = key.substring(HADOOP_PREFIX.length());
        conf.set(newKey, _config.get(key));
    }
  }
  return conf;
}