org.apache.hadoop.mapred.Mapper Java Examples

The following examples show how to use org.apache.hadoop.mapred.Mapper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestMultipleInputs.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
 
Example #2
Source File: TestMultipleInputs.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
 
Example #3
Source File: TestDFSIO.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #4
Source File: TestMultipleInputs.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
 
Example #5
Source File: PipeReducer.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
    super.configure(job);
    Class<?> c = job.getClass("stream.reduce.posthook", null, Mapper.class);
    if(c != null) {
        postMapper = (Mapper)ReflectionUtils.newInstance(c, job);
        LOG.info("PostHook="+c.getName());
    }

    c = job.getClass("stream.reduce.prehook", null, Reducer.class);
    if(c != null) {
        preReducer = (Reducer)ReflectionUtils.newInstance(c, job);
        oc = new InmemBufferingOutputCollector();
        LOG.info("PreHook="+c.getName());
    }
    this.ignoreKey = job.getBoolean("stream.reduce.ignoreKey", false);
}
 
Example #6
Source File: TestMultipleInputs.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
 
Example #7
Source File: TestDFSIO.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #8
Source File: PersonVersion.java    From blog with MIT License 6 votes vote down vote up
private static void runJobPv(String inputDir, String outputDir, String jobName, Class<? extends Mapper> mapClass,
                             Class<? extends Reducer> reduceClass) throws Exception {
    JobConf conf = new JobConf(PersonVersion.class);
    conf.setJobName(jobName);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(mapClass);
    conf.setCombinerClass(reduceClass);
    conf.setReducerClass(reduceClass);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, new Path(outputDir));

    JobClient.runJob(conf);
}
 
Example #9
Source File: TaggedInputSplit.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new TaggedInputSplit.
 * 
 * @param inputSplit The InputSplit to be tagged
 * @param conf The configuration to use
 * @param inputFormatClass The InputFormat class to use for this job
 * @param mapperClass The Mapper class to use for this job
 */
public TaggedInputSplit(InputSplit inputSplit, Configuration conf,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {
  this.inputSplitClass = inputSplit.getClass();
  this.inputSplit = inputSplit;
  this.conf = conf;
  this.inputFormatClass = inputFormatClass;
  this.mapperClass = mapperClass;
}
 
Example #10
Source File: MultipleInputs.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 * 
 * @param conf The configuration of the job
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 * @param mapperClass {@link Mapper} class to use for this path
 */
public static void addInputPath(JobConf conf, Path path,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {

  addInputPath(conf, path, inputFormatClass);

  String mapperMapping = path.toString() + ";" + mapperClass.getName();
  String mappers = conf.get("mapreduce.input.multipleinputs.dir.mappers");
  conf.set("mapreduce.input.multipleinputs.dir.mappers", mappers == null ? mapperMapping
     : mappers + "," + mapperMapping);

  conf.setMapperClass(DelegatingMapper.class);
}
 
Example #11
Source File: HadoopMapFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Maps a Hadoop Mapper (mapred API) to a Flink FlatMapFunction.
 * The Hadoop Mapper is configured with the provided JobConf.
 *
 * @param hadoopMapper The Hadoop Mapper to wrap.
 * @param conf The JobConf that is used to configure the Hadoop Mapper.
 */
public HadoopMapFunction(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopMapper, JobConf conf) {
	if (hadoopMapper == null) {
		throw new NullPointerException("Mapper may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.mapper = hadoopMapper;
	this.jobConf = conf;
}
 
Example #12
Source File: HadoopMapFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public TypeInformation<Tuple2<KEYOUT, VALUEOUT>> getProducedType() {
	Class<KEYOUT> outKeyClass = (Class<KEYOUT>) TypeExtractor.getParameterType(Mapper.class, mapper.getClass(), 2);
	Class<VALUEOUT> outValClass = (Class<VALUEOUT>) TypeExtractor.getParameterType(Mapper.class, mapper.getClass(), 3);

	final TypeInformation<KEYOUT> keyTypeInfo = TypeExtractor.getForClass((Class<KEYOUT>) outKeyClass);
	final TypeInformation<VALUEOUT> valueTypleInfo = TypeExtractor.getForClass((Class<VALUEOUT>) outValClass);
	return new TupleTypeInfo<Tuple2<KEYOUT, VALUEOUT>>(keyTypeInfo, valueTypleInfo);
}
 
Example #13
Source File: HadoopMapFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {
	Class<Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>> mapperClass =
			(Class<Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>>) in.readObject();
	mapper = InstantiationUtil.instantiate(mapperClass);

	jobConf = new JobConf();
	jobConf.readFields(in);
}
 
Example #14
Source File: PipelineTest.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  mapper1 = new IdentityMapper<Text, Text>();
  reducer1 = new IdentityReducer<Text, Text>();
  mapper2 = new IdentityMapper<Text, Text>();
  reducer2 = new IdentityReducer<Text, Text>();
  driver = new PipelineMapReduceDriver<Text, Text, Text, Text>();
  driver.addMapReduce(new Pair<Mapper, Reducer>(mapper1, reducer1));
  driver.addMapReduce(new Pair<Mapper, Reducer>(mapper2, reducer2));
}
 
Example #15
Source File: DelegatingMapper.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example #16
Source File: HadoopMapFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Maps a Hadoop Mapper (mapred API) to a Flink FlatMapFunction.
 * The Hadoop Mapper is configured with the provided JobConf.
 *
 * @param hadoopMapper The Hadoop Mapper to wrap.
 * @param conf The JobConf that is used to configure the Hadoop Mapper.
 */
public HadoopMapFunction(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopMapper, JobConf conf) {
	if (hadoopMapper == null) {
		throw new NullPointerException("Mapper may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.mapper = hadoopMapper;
	this.jobConf = conf;
}
 
Example #17
Source File: MultipleInputs.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 * 
 * @param conf The configuration of the job
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 * @param mapperClass {@link Mapper} class to use for this path
 */
public static void addInputPath(JobConf conf, Path path,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {

  addInputPath(conf, path, inputFormatClass);

  String mapperMapping = path.toString() + ";" + mapperClass.getName();
  String mappers = conf.get("mapred.input.dir.mappers");
  conf.set("mapred.input.dir.mappers", mappers == null ? mapperMapping
     : mappers + "," + mapperMapping);

  conf.setMapperClass(DelegatingMapper.class);
}
 
Example #18
Source File: MapperWrapper.java    From ApprovalTests.Java with Apache License 2.0 5 votes vote down vote up
public MapperWrapper(Mapper<KeyIn, ValueIn, KeyOut, ValueOut> mapper, Class<KeyIn> keyIn,
    Class<ValueIn> valueIn, Class<KeyOut> keyOut, Class<ValueOut> valueOut)
{
  this.mapper = mapper;
  this.keyIn = keyIn;
  this.valueIn = valueIn;
  this.keyOut = keyOut;
  this.valueOut = valueOut;
}
 
Example #19
Source File: TaggedInputSplit.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
  inputSplitClass = (Class<? extends InputSplit>) readClass(in);
  inputSplit = (InputSplit) ReflectionUtils
     .newInstance(inputSplitClass, conf);
  inputSplit.readFields(in);
  inputFormatClass = (Class<? extends InputFormat>) readClass(in);
  mapperClass = (Class<? extends Mapper>) readClass(in);
}
 
Example #20
Source File: ChainMapper.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Chains the <code>map(...)</code> methods of the Mappers in the chain.
 */
@SuppressWarnings({"unchecked"})
public void map(Object key, Object value, OutputCollector output,
                Reporter reporter) throws IOException {
  Mapper mapper = chain.getFirstMap();
  if (mapper != null) {
    mapper.map(key, value, chain.getMapperCollector(0, output, reporter),
               reporter);
  }
}
 
Example #21
Source File: MultipleInputs.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 * 
 * @param conf The configuration of the job
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 * @param mapperClass {@link Mapper} class to use for this path
 */
public static void addInputPath(JobConf conf, Path path,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {

  addInputPath(conf, path, inputFormatClass);

  String mapperMapping = path.toString() + ";" + mapperClass.getName();
  String mappers = conf.get("mapred.input.dir.mappers");
  conf.set("mapred.input.dir.mappers", mappers == null ? mapperMapping
     : mappers + "," + mapperMapping);

  conf.setMapperClass(DelegatingMapper.class);
}
 
Example #22
Source File: ChainMapper.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Chains the <code>map(...)</code> methods of the Mappers in the chain.
 */
@SuppressWarnings({"unchecked"})
public void map(Object key, Object value, OutputCollector output,
                Reporter reporter) throws IOException {
  Mapper mapper = chain.getFirstMap();
  if (mapper != null) {
    mapper.map(key, value, chain.getMapperCollector(0, output, reporter),
               reporter);
  }
}
 
Example #23
Source File: TaggedInputSplit.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
  inputSplitClass = (Class<? extends InputSplit>) readClass(in);
  inputSplit = (InputSplit) ReflectionUtils
     .newInstance(inputSplitClass, conf);
  inputSplit.readFields(in);
  inputFormatClass = (Class<? extends InputFormat>) readClass(in);
  mapperClass = (Class<? extends Mapper>) readClass(in);
}
 
Example #24
Source File: PipeMapper.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
  this.ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()) ||
      job.getBoolean("stream.map.ignoreKey", false);

  this.skipNewline = job.getBoolean("stream.map.skipNewline", false);

  Class<?> c = job.getClass("stream.map.posthook", null, Mapper.class);
  if(c != null) {
      postMapper = (Mapper)ReflectionUtils.newInstance(c, job);
      LOG.info("PostHook="+c.getName());
  }
}
 
Example #25
Source File: TaggedInputSplit.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new TaggedInputSplit.
 * 
 * @param inputSplit The InputSplit to be tagged
 * @param conf The configuration to use
 * @param inputFormatClass The InputFormat class to use for this job
 * @param mapperClass The Mapper class to use for this job
 */
public TaggedInputSplit(InputSplit inputSplit, Configuration conf,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {
  this.inputSplitClass = inputSplit.getClass();
  this.inputSplit = inputSplit;
  this.conf = conf;
  this.inputFormatClass = inputFormatClass;
  this.mapperClass = mapperClass;
}
 
Example #26
Source File: DelegatingMapper.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example #27
Source File: TaggedInputSplit.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new TaggedInputSplit.
 * 
 * @param inputSplit The InputSplit to be tagged
 * @param conf The configuration to use
 * @param inputFormatClass The InputFormat class to use for this job
 * @param mapperClass The Mapper class to use for this job
 */
public TaggedInputSplit(InputSplit inputSplit, Configuration conf,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {
  this.inputSplitClass = inputSplit.getClass();
  this.inputSplit = inputSplit;
  this.conf = conf;
  this.inputFormatClass = inputFormatClass;
  this.mapperClass = mapperClass;
}
 
Example #28
Source File: TaggedInputSplit.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new TaggedInputSplit.
 * 
 * @param inputSplit The InputSplit to be tagged
 * @param conf The configuration to use
 * @param inputFormatClass The InputFormat class to use for this job
 * @param mapperClass The Mapper class to use for this job
 */
public TaggedInputSplit(InputSplit inputSplit, Configuration conf,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {
  this.inputSplitClass = inputSplit.getClass();
  this.inputSplit = inputSplit;
  this.conf = conf;
  this.inputFormatClass = inputFormatClass;
  this.mapperClass = mapperClass;
}
 
Example #29
Source File: DelegatingMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example #30
Source File: TaggedInputSplit.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
  inputSplitClass = (Class<? extends InputSplit>) readClass(in);
  inputSplit = (InputSplit) ReflectionUtils
     .newInstance(inputSplitClass, conf);
  inputSplit.readFields(in);
  inputFormatClass = (Class<? extends InputFormat>) readClass(in);
  mapperClass = (Class<? extends Mapper>) readClass(in);
}