org.apache.hadoop.mapred.Reducer Java Examples

The following examples show how to use org.apache.hadoop.mapred.Reducer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopReduceCombineFunction.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
 *
 * @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
 * @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
 * @param conf The JobConf that is used to configure both Hadoop Reducers.
 */
public HadoopReduceCombineFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer,
							Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN> hadoopCombiner, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (hadoopCombiner == null) {
		throw new NullPointerException("Combiner may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.combiner = hadoopCombiner;
	this.jobConf = conf;
}
 
Example #2
Source File: PipeReducer.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
    super.configure(job);
    Class<?> c = job.getClass("stream.reduce.posthook", null, Mapper.class);
    if(c != null) {
        postMapper = (Mapper)ReflectionUtils.newInstance(c, job);
        LOG.info("PostHook="+c.getName());
    }

    c = job.getClass("stream.reduce.prehook", null, Reducer.class);
    if(c != null) {
        preReducer = (Reducer)ReflectionUtils.newInstance(c, job);
        oc = new InmemBufferingOutputCollector();
        LOG.info("PreHook="+c.getName());
    }
    this.ignoreKey = job.getBoolean("stream.reduce.ignoreKey", false);
}
 
Example #3
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
 *
 * @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
 * @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
 * @param conf The JobConf that is used to configure both Hadoop Reducers.
 */
public HadoopReduceCombineFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer,
							Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN> hadoopCombiner, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (hadoopCombiner == null) {
		throw new NullPointerException("Combiner may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.combiner = hadoopCombiner;
	this.jobConf = conf;
}
 
Example #4
Source File: MRCombiner.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private void runOldCombiner(final TezRawKeyValueIterator rawIter, final Writer writer) throws IOException {
  Class<? extends Reducer> reducerClazz = (Class<? extends Reducer>) conf.getClass("mapred.combiner.class", null, Reducer.class);
  
  Reducer combiner = ReflectionUtils.newInstance(reducerClazz, conf);
  
  OutputCollector collector = new OutputCollector() {
    @Override
    public void collect(Object key, Object value) throws IOException {
      writer.append(key, value);
    }
  };
  
  CombinerValuesIterator values = new CombinerValuesIterator(rawIter, keyClass, valClass, comparator);
  
  while (values.moveToNext()) {
    combiner.reduce(values.getKey(), values.getValues().iterator(), collector, reporter);
  }
}
 
Example #5
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void combineAndSpill(
    RawKeyValueIterator kvIter,
    Counters.Counter inCounter) throws IOException {
  JobConf job = jobConf;
  Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
  Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
  Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
  RawComparator<K> comparator = 
    (RawComparator<K>)job.getCombinerKeyGroupingComparator();
  try {
    CombineValuesIterator values = new CombineValuesIterator(
        kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
        inCounter);
    while (values.more()) {
      combiner.reduce(values.getKey(), values, combineCollector,
                      Reporter.NULL);
      values.nextKey();
    }
  } finally {
    combiner.close();
  }
}
 
Example #6
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void combineAndSpill(
    RawKeyValueIterator kvIter,
    Counters.Counter inCounter) throws IOException {
  JobConf job = jobConf;
  Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
  Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
  Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
  RawComparator<K> comparator = 
    (RawComparator<K>)job.getCombinerKeyGroupingComparator();
  try {
    CombineValuesIterator values = new CombineValuesIterator(
        kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
        inCounter);
    while (values.more()) {
      combiner.reduce(values.getKey(), values, combineCollector,
                      Reporter.NULL);
      values.nextKey();
    }
  } finally {
    combiner.close();
  }
}
 
Example #7
Source File: PersonVersion.java    From blog with MIT License 6 votes vote down vote up
private static void runJobPv(String inputDir, String outputDir, String jobName, Class<? extends Mapper> mapClass,
                             Class<? extends Reducer> reduceClass) throws Exception {
    JobConf conf = new JobConf(PersonVersion.class);
    conf.setJobName(jobName);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(mapClass);
    conf.setCombinerClass(reduceClass);
    conf.setReducerClass(reduceClass);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, new Path(outputDir));

    JobClient.runJob(conf);
}
 
Example #8
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
 *
 * @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
 * @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
 * @param conf The JobConf that is used to configure both Hadoop Reducers.
 */
public HadoopReduceCombineFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer,
							Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN> hadoopCombiner, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (hadoopCombiner == null) {
		throw new NullPointerException("Combiner may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.combiner = hadoopCombiner;
	this.jobConf = conf;
}
 
Example #9
Source File: MRCombiner.java    From tez with Apache License 2.0 6 votes vote down vote up
private void runOldCombiner(final TezRawKeyValueIterator rawIter, final Writer writer) throws IOException {
  Class<? extends Reducer> reducerClazz = (Class<? extends Reducer>) conf.getClass("mapred.combiner.class", null, Reducer.class);
  
  Reducer combiner = ReflectionUtils.newInstance(reducerClazz, conf);
  
  OutputCollector collector = new OutputCollector() {
    @Override
    public void collect(Object key, Object value) throws IOException {
      writer.append(key, value);
      combineOutputRecordsCounter.increment(1);
    }
  };
  
  CombinerValuesIterator values = new CombinerValuesIterator(rawIter, keyClass, valClass, comparator);
  
  while (values.moveToNext()) {
    combiner.reduce(values.getKey(), values.getValues().iterator(), collector, reporter);
  }
}
 
Example #10
Source File: HadoopReduceCombineFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public TypeInformation<Tuple2<KEYOUT, VALUEOUT>> getProducedType() {
	Class<KEYOUT> outKeyClass = (Class<KEYOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
	Class<VALUEOUT> outValClass = (Class<VALUEOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);

	final TypeInformation<KEYOUT> keyTypeInfo = TypeExtractor.getForClass(outKeyClass);
	final TypeInformation<VALUEOUT> valueTypleInfo = TypeExtractor.getForClass(outValClass);
	return new TupleTypeInfo<>(keyTypeInfo, valueTypleInfo);
}
 
Example #11
Source File: PipelineTest.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  mapper1 = new IdentityMapper<Text, Text>();
  reducer1 = new IdentityReducer<Text, Text>();
  mapper2 = new IdentityMapper<Text, Text>();
  reducer2 = new IdentityReducer<Text, Text>();
  driver = new PipelineMapReduceDriver<Text, Text, Text, Text>();
  driver.addMapReduce(new Pair<Mapper, Reducer>(mapper1, reducer1));
  driver.addMapReduce(new Pair<Mapper, Reducer>(mapper2, reducer2));
}
 
Example #12
Source File: HadoopReduceFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {

	Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>> reducerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>>) in.readObject();
	reducer = InstantiationUtil.instantiate(reducerClass);

	jobConf = new JobConf();
	jobConf.readFields(in);
}
 
Example #13
Source File: HadoopReduceFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public TypeInformation<Tuple2<KEYOUT, VALUEOUT>> getProducedType() {
	Class<KEYOUT> outKeyClass = (Class<KEYOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
	Class<VALUEOUT> outValClass = (Class<VALUEOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);

	final TypeInformation<KEYOUT> keyTypeInfo = TypeExtractor.getForClass((Class<KEYOUT>) outKeyClass);
	final TypeInformation<VALUEOUT> valueTypleInfo = TypeExtractor.getForClass((Class<VALUEOUT>) outValClass);
	return new TupleTypeInfo<Tuple2<KEYOUT, VALUEOUT>>(keyTypeInfo, valueTypleInfo);
}
 
Example #14
Source File: HadoopReduceFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.reducer.configure(jobConf);

	this.reporter = new HadoopDummyReporter();
	this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
	Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
	TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
	this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}
 
Example #15
Source File: HadoopReduceFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.
	 *
 * @param hadoopReducer The Hadoop Reducer to wrap.
 * @param conf The JobConf that is used to configure the Hadoop Reducer.
 */
public HadoopReduceFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.jobConf = conf;
}
 
Example #16
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {

	Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>> reducerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>>) in.readObject();
	reducer = InstantiationUtil.instantiate(reducerClass);

	Class<Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN>> combinerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN>>) in.readObject();
	combiner = InstantiationUtil.instantiate(combinerClass);

	jobConf = new JobConf();
	jobConf.readFields(in);
}
 
Example #17
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public TypeInformation<Tuple2<KEYOUT, VALUEOUT>> getProducedType() {
	Class<KEYOUT> outKeyClass = (Class<KEYOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
	Class<VALUEOUT> outValClass = (Class<VALUEOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);

	final TypeInformation<KEYOUT> keyTypeInfo = TypeExtractor.getForClass(outKeyClass);
	final TypeInformation<VALUEOUT> valueTypleInfo = TypeExtractor.getForClass(outValClass);
	return new TupleTypeInfo<>(keyTypeInfo, valueTypleInfo);
}
 
Example #18
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.reducer.configure(jobConf);
	this.combiner.configure(jobConf);

	this.reporter = new HadoopDummyReporter();
	Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
	TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
	this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
	this.combineCollector = new HadoopOutputCollector<>();
	this.reduceCollector = new HadoopOutputCollector<>();
}
 
Example #19
Source File: HadoopReduceCombineFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.reducer.configure(jobConf);
	this.combiner.configure(jobConf);

	this.reporter = new HadoopDummyReporter();
	Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
	TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
	this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
	this.combineCollector = new HadoopOutputCollector<>();
	this.reduceCollector = new HadoopOutputCollector<>();
}
 
Example #20
Source File: ReduceOperator.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void setup(OperatorContext context)
{
  reporter = new ReporterImpl(ReporterImpl.ReporterType.Reducer, new Counters());
  if (context != null) {
    operatorId = context.getId();
  }
  cacheObject = new HashMap<K1, List<V1>>();
  outputCollector = new OutputCollectorImpl<K2, V2>();
  if (reduceClass != null) {
    try {
      reduceObj = reduceClass.newInstance();
    } catch (Exception e) {
      logger.info("can't instantiate object {}", e.getMessage());
      throw new RuntimeException(e);
    }
    Configuration conf = new Configuration();
    InputStream stream = null;
    if (configFile != null && configFile.length() > 0) {
      logger.info("system /{}", configFile);
      stream = ClassLoader.getSystemResourceAsStream("/" + configFile);
      if (stream == null) {
        logger.info("system {}", configFile);
        stream = ClassLoader.getSystemResourceAsStream(configFile);
      }
    }
    if (stream != null) {
      logger.info("found our stream... so adding it");
      conf.addResource(stream);
    }
    reduceObj.configure(new JobConf(conf));
  }

}
 
Example #21
Source File: HadoopReduceFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {

	Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>> reducerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>>) in.readObject();
	reducer = InstantiationUtil.instantiate(reducerClass);

	jobConf = new JobConf();
	jobConf.readFields(in);
}
 
Example #22
Source File: MRCombiner.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private void runNewCombiner(final TezRawKeyValueIterator rawIter, final Writer writer) throws InterruptedException, IOException {
  
  RecordWriter recordWriter = new RecordWriter() {

    @Override
    public void write(Object key, Object value) throws IOException,
        InterruptedException {
      writer.append(key, value);
    }

    @Override
    public void close(TaskAttemptContext context) throws IOException,
        InterruptedException {
      // Will be closed by whoever invokes the combiner.
    }
  };
  
  Class<? extends org.apache.hadoop.mapreduce.Reducer> reducerClazz = (Class<? extends org.apache.hadoop.mapreduce.Reducer>) conf
      .getClass(MRJobConfig.COMBINE_CLASS_ATTR, null,
          org.apache.hadoop.mapreduce.Reducer.class);
  org.apache.hadoop.mapreduce.Reducer reducer = ReflectionUtils.newInstance(reducerClazz, conf);
  
  org.apache.hadoop.mapreduce.Reducer.Context reducerContext =
      createReduceContext(
          conf,
          mrTaskAttemptID,
          rawIter,
          new MRCounters.MRCounter(combineInputKeyCounter),
          new MRCounters.MRCounter(combineInputValueCounter),
          recordWriter,
          reporter,
          (RawComparator)comparator,
          keyClass,
          valClass);
  
  reducer.run(reducerContext);
  recordWriter.close(reducerContext);
}
 
Example #23
Source File: HadoopReduceCombineFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {

	Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>> reducerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>>) in.readObject();
	reducer = InstantiationUtil.instantiate(reducerClass);

	Class<Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN>> combinerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN>>) in.readObject();
	combiner = InstantiationUtil.instantiate(combinerClass);

	jobConf = new JobConf();
	jobConf.readFields(in);
}
 
Example #24
Source File: MRCombiner.java    From tez with Apache License 2.0 5 votes vote down vote up
private void runNewCombiner(final TezRawKeyValueIterator rawIter, final Writer writer) throws InterruptedException, IOException {
  
  RecordWriter recordWriter = new RecordWriter() {

    @Override
    public void write(Object key, Object value) throws IOException,
        InterruptedException {
      writer.append(key, value);
      combineOutputRecordsCounter.increment(1);
    }

    @Override
    public void close(TaskAttemptContext context) throws IOException,
        InterruptedException {
      // Will be closed by whoever invokes the combiner.
    }
  };
  
  Class<? extends org.apache.hadoop.mapreduce.Reducer> reducerClazz = (Class<? extends org.apache.hadoop.mapreduce.Reducer>) conf
      .getClass(MRJobConfig.COMBINE_CLASS_ATTR, null,
          org.apache.hadoop.mapreduce.Reducer.class);
  org.apache.hadoop.mapreduce.Reducer reducer = ReflectionUtils.newInstance(reducerClazz, conf);
  
  org.apache.hadoop.mapreduce.Reducer.Context reducerContext =
      createReduceContext(
          conf,
          mrTaskAttemptID,
          rawIter,
          new MRCounters.MRCounter(combineInputRecordsCounter),
          new MRCounters.MRCounter(combineOutputRecordsCounter),
          recordWriter,
          reporter,
          (RawComparator)comparator,
          keyClass,
          valClass);
  
  reducer.run(reducerContext);
  recordWriter.close(reducerContext);
}
 
Example #25
Source File: HadoopReduceFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public TypeInformation<Tuple2<KEYOUT, VALUEOUT>> getProducedType() {
	Class<KEYOUT> outKeyClass = (Class<KEYOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
	Class<VALUEOUT> outValClass = (Class<VALUEOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);

	final TypeInformation<KEYOUT> keyTypeInfo = TypeExtractor.getForClass((Class<KEYOUT>) outKeyClass);
	final TypeInformation<VALUEOUT> valueTypleInfo = TypeExtractor.getForClass((Class<VALUEOUT>) outValClass);
	return new TupleTypeInfo<Tuple2<KEYOUT, VALUEOUT>>(keyTypeInfo, valueTypleInfo);
}
 
Example #26
Source File: HadoopReduceFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.reducer.configure(jobConf);

	this.reporter = new HadoopDummyReporter();
	this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
	Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
	TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
	this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}
 
Example #27
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.reducer.configure(jobConf);
	this.combiner.configure(jobConf);

	this.reporter = new HadoopDummyReporter();
	Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
	TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
	this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
	this.combineCollector = new HadoopOutputCollector<>();
	this.reduceCollector = new HadoopOutputCollector<>();
}
 
Example #28
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public TypeInformation<Tuple2<KEYOUT, VALUEOUT>> getProducedType() {
	Class<KEYOUT> outKeyClass = (Class<KEYOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
	Class<VALUEOUT> outValClass = (Class<VALUEOUT>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);

	final TypeInformation<KEYOUT> keyTypeInfo = TypeExtractor.getForClass(outKeyClass);
	final TypeInformation<VALUEOUT> valueTypleInfo = TypeExtractor.getForClass(outValClass);
	return new TupleTypeInfo<>(keyTypeInfo, valueTypleInfo);
}
 
Example #29
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {

	Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>> reducerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>>) in.readObject();
	reducer = InstantiationUtil.instantiate(reducerClass);

	Class<Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN>> combinerClass =
			(Class<Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN>>) in.readObject();
	combiner = InstantiationUtil.instantiate(combinerClass);

	jobConf = new JobConf();
	jobConf.readFields(in);
}
 
Example #30
Source File: HadoopReduceFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.
	 *
 * @param hadoopReducer The Hadoop Reducer to wrap.
 * @param conf The JobConf that is used to configure the Hadoop Reducer.
 */
public HadoopReduceFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.jobConf = conf;
}