org.apache.hadoop.io.IntWritable Java Exaples

Source File: HadoopMapFunctionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testConfigurableMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.filterPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> hellos = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(2,Hello)\n" +
			"(3,Hello world)\n" +
			"(4,Hello world, how are you?)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Source File: TestTezMerger.java From tez with Apache License 2.0

6 votes

private List<TezMerger.Segment> createInMemorySegments(int segmentCount, int keysPerSegment)
    throws IOException {
  List<TezMerger.Segment> segmentList = Lists.newLinkedList();
  Random rnd = new Random();
  DataInputBuffer key = new DataInputBuffer();
  DataInputBuffer value = new DataInputBuffer();
  for (int i = 0; i < segmentCount; i++) {
    BoundedByteArrayOutputStream stream = new BoundedByteArrayOutputStream(10000);
    InMemoryWriter writer = new InMemoryWriter(stream);

    for (int j = 0; j < keysPerSegment; j++) {
      populateData(new IntWritable(rnd.nextInt()), new LongWritable(rnd.nextLong()), key, value);
      writer.append(key, value);
    }
    writer.close();
    InMemoryReader reader = new InMemoryReader(merger, null, stream.getBuffer(), 0, stream.getLimit());

    segmentList.add(new TezMerger.Segment(reader, null));
  }
  return segmentList;
}

Source File: TestJoinTupleWritable.java From hadoop with Apache License 2.0

6 votes

public void testNestedIterable() throws Exception {
  Random r = new Random();
  Writable[] writs = {
    new BooleanWritable(r.nextBoolean()),
    new FloatWritable(r.nextFloat()),
    new FloatWritable(r.nextFloat()),
    new IntWritable(r.nextInt()),
    new LongWritable(r.nextLong()),
    new BytesWritable("dingo".getBytes()),
    new LongWritable(r.nextLong()),
    new IntWritable(r.nextInt()),
    new BytesWritable("yak".getBytes()),
    new IntWritable(r.nextInt())
  };
  TupleWritable sTuple = makeTuple(writs);
  assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}

Source File: SleepJob.java From hadoop-book with Apache License 2.0

6 votes

public void map(IntWritable key, IntWritable value,
        OutputCollector<IntWritable, NullWritable> output, Reporter reporter)
        throws IOException {

    //it is expected that every map processes mapSleepCount number of records. 
    try {
        reporter.setStatus("Sleeping... ("
                + (mapSleepDuration * (mapSleepCount - count)) + ") ms left");
        Thread.sleep(mapSleepDuration);
    } catch (InterruptedException ex) {
        throw (IOException) new IOException(
                "Interrupted while sleeping").initCause(ex);
    }
    ++count;
    // output reduceSleepCount * numReduce number of random values, so that
    // each reducer will get reduceSleepCount number of keys.
    int k = key.get();
    for (int i = 0; i < value.get(); ++i) {
        output.collect(new IntWritable(k + i), NullWritable.get());
    }
}

Source File: ActiveUserCollector.java From BigDataPlatform with GNU General Public License v3.0

6 votes

@Override
public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value,
    PreparedStatement preparedStatement, IDimensionConverter converter)
    throws SQLException, IOException {
  StatsUserDimension statsUser = (StatsUserDimension)key;
  MapWritableValue mapWritableValue = (MapWritableValue)value;
  IntWritable activeUserValue = (IntWritable) mapWritableValue.getValue().get(new IntWritable(-1));

  int i = 0;
  preparedStatement.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getPlatform()));
  preparedStatement.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getDate()));
  preparedStatement.setInt(++i, activeUserValue.get());
  preparedStatement.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMS));
  preparedStatement.setInt(++i, activeUserValue.get());
  preparedStatement.addBatch();
}

Source File: StatsUserNewInstallUserCollector.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, 
		PreparedStatement pstmt, IDimensionConverter converter) 
				throws SQLException, IOException {
    StatsUserDimension statsUserDimension = (StatsUserDimension) key;
    MapWritableValue mapWritableValue = (MapWritableValue) value;
    IntWritable newInstallUsers = (IntWritable) mapWritableValue.getValue().get(new IntWritable(-1));

    int i = 0;
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUserDimension.getStatsCommon().getPlatform()));
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUserDimension.getStatsCommon().getDate()));
    pstmt.setInt(++i, newInstallUsers.get());
    pstmt.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMES));
    pstmt.setInt(++i, newInstallUsers.get());
    pstmt.addBatch();//往批处理放入数据
}

Source File: TestIFile.java From tez with Apache License 2.0

6 votes

@Test(timeout = 5000)
// Test empty file case
public void testEmptyFileBackedInMemIFileWriter() throws IOException {
  List<KVPair> data = new ArrayList<>();
  TezTaskOutputFiles
      tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1);

  IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter(defaultConf, localFs, tezTaskOutput,
      Text.class, IntWritable.class, codec, null, null,
      100);

  // empty ifile
  writer.close();

  byte[] bytes = new byte[(int) writer.getRawLength()];

  IFile.Reader.readToMemory(bytes,
      new ByteArrayInputStream(ByteString.copyFrom(writer.getData()).toByteArray()),
      (int) writer.getCompressedLength(), codec, false, -1);

  readUsingInMemoryReader(bytes, data);
}

Source File: TestIFile.java From incubator-tez with Apache License 2.0

6 votes

@Test
//Test appendValue with DataInputBuffer
public void testAppendValueWithDataInputBuffer() throws IOException {
  List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
  IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath,
      Text.class, IntWritable.class, codec, null, null);

  final DataInputBuffer previousKey = new DataInputBuffer();
  DataInputBuffer key = new DataInputBuffer();
  DataInputBuffer value = new DataInputBuffer();
  for (KVPair kvp : data) {
    populateData(kvp, key, value);

    if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
      writer.appendValue(value);
    } else {
      writer.append(key, value);
    }
    previousKey.reset(k.getData(), 0, k.getLength());
  }

  writer.close();

  readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

Source File: TestJobCounters.java From big-c with Apache License 2.0

6 votes

public static Job createJob() throws IOException {
  final Configuration conf = new Configuration();
  final Job baseJob = Job.getInstance(conf);
  baseJob.setOutputKeyClass(Text.class);
  baseJob.setOutputValueClass(IntWritable.class);
  baseJob.setMapperClass(NewMapTokenizer.class);
  baseJob.setCombinerClass(NewSummer.class);
  baseJob.setReducerClass(NewSummer.class);
  baseJob.setNumReduceTasks(1);
  baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
  baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
  baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize(
      baseJob, Long.MAX_VALUE);
  return baseJob;
}

Source File: TaskTracker.java From hadoop-gpu with Apache License 2.0

6 votes

/** Queries the job tracker for a set of outputs ready to be copied
 * @param fromEventId the first event ID we want to start from, this is
 * modified by the call to this method
 * @param jobClient the job tracker
 * @return a set of locations to copy outputs from
 * @throws IOException
 */  
private List<TaskCompletionEvent> queryJobTracker(IntWritable fromEventId,
                                                  JobID jobId,
                                                  InterTrackerProtocol jobClient)
  throws IOException {

  TaskCompletionEvent t[] = jobClient.getTaskCompletionEvents(
                                                              jobId,
                                                              fromEventId.get(),
                                                              probe_sample_size);
  //we are interested in map task completion events only. So store
  //only those
  List <TaskCompletionEvent> recentMapEvents = 
    new ArrayList<TaskCompletionEvent>();
  for (int i = 0; i < t.length; i++) {
    if (t[i].isMap) {
      recentMapEvents.add(t[i]);
    }
  }
  fromEventId.set(fromEventId.get() + t.length);
  return recentMapEvents;
}

Source File: UpdateCFJob.java From recsys-offline with Apache License 2.0

6 votes

public void run() throws Exception{
	long startTime = System.currentTimeMillis();
	Configuration conf = new Configuration();
	conf.set(TableOutputFormat.OUTPUT_TABLE, Constants.hbase_user_item_pref_table);
	Job job = Job.getInstance(conf, "hbasewriter"+System.currentTimeMillis());
	job.setJarByClass(UpdateCFJob.class);
	job.setMapperClass(TokenizerMapper.class);
	job.setReducerClass(HBaseWriteReducer.class);
	job.setMapOutputKeyClass(IntWritable.class);  
	job.setMapOutputValueClass(Text.class);
	job.setOutputFormatClass(TableOutputFormat.class);
	FileInputFormat.addInputPath(job, new Path(input));
	long endTime = System.currentTimeMillis();
	boolean isFinish = job.waitForCompletion(true);
	if(isFinish){
		logger.info("UpdateCFJob job ["+job.getJobName()+"] run finish.it costs"+ (endTime - startTime) / 1000 +"s.");
	} else {
		logger.error("UpdateCFJob job ["+job.getJobName()+"] run failed.");
	}
}

Source File: TestEsriJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0

6 votes

@Test
public void TestIntParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new EsriJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "num");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "int");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       //value.set("{\"attributes\":{\"num\":7},\"geometry\":null}");
       value.set("{\"attributes\":{\"num\":7}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("num");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals(7, ((IntWritable)fieldData).get());
       value.set("{\"attributes\":{\"num\":9}}");
       row = jserde.deserialize(value);
	f0 = rowOI.getStructFieldRef("num");
	fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals(9, ((IntWritable)fieldData).get());
}

Source File: Step32.java From recsys-offline with Apache License 2.0

6 votes

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
    // TODO Auto-generated method stub  
    Configuration conf1 = new Configuration();  

    Job job1 = new Job(conf1, "wiki  job one");  
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);  
    job1.setInputFormatClass(SequenceFileInputFormat.class);  
    job1.setNumReduceTasks(1);  
    job1.setJarByClass(Step32.class);  
    job1.setMapperClass(WikiMapper32.class);  
    job1.setMapOutputKeyClass(IntWritable.class);  
    job1.setMapOutputValueClass(VectorOrPrefWritable.class);  
  
    job1.setReducerClass(WiKiReducer32.class);  
    job1.setOutputKeyClass(IntWritable.class);  
    job1.setOutputValueClass(VectorOrPrefWritable.class);  
      
    // the WiKiDriver's out put is this one's input  
    SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH));  
    SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH));     
    if(!job1.waitForCompletion(true)){  
        System.exit(1); // run error then exit  
    }  
}

Source File: ActiveUserBrowserCollector.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, PreparedStatement pstmt, IDimensionConverter converter) throws SQLException, IOException {
 // 进行强制后获取对应的值
    StatsUserDimension statsUser = (StatsUserDimension) key;
    IntWritable activeUserValue = (IntWritable) ((MapWritableValue) value).getValue().get(new IntWritable(-1));

    // 进行参数设置
    int i = 0;
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getPlatform()));
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getDate()));
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getBrowser()));
    pstmt.setInt(++i, activeUserValue.get());
    pstmt.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMES));
    pstmt.setInt(++i, activeUserValue.get());

    // 添加到batch中
    pstmt.addBatch();
}

Source File: BroadcastLoadGen.java From tez with Apache License 2.0

6 votes

private DAG createDAG(int numGenTasks, int totalSourceDataSize, int numFetcherTasks) {
  int bytesPerSource = totalSourceDataSize / numGenTasks;
  LOG.info("DataPerSourceTask(bytes)=" + bytesPerSource);
  ByteBuffer payload = ByteBuffer.allocate(4);
  payload.putInt(0, bytesPerSource);

  Vertex broadcastVertex = Vertex.create("DataGen",
      ProcessorDescriptor.create(InputGenProcessor.class.getName())
          .setUserPayload(UserPayload.create(payload)), numGenTasks);
  Vertex fetchVertex = Vertex.create("FetchVertex",
      ProcessorDescriptor.create(InputFetchProcessor.class.getName()), numFetcherTasks);
  UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(NullWritable.class
  .getName(), IntWritable.class.getName()).setCompression(false, null, null).build();

  DAG dag = DAG.create("BroadcastLoadGen");
  dag.addVertex(broadcastVertex).addVertex(fetchVertex).addEdge(
      Edge.create(broadcastVertex, fetchVertex, edgeConf.createDefaultBroadcastEdgeProperty()));
  return dag;
}

Source File: PVMinMax.java From MapReduce-Demo with MIT License

5 votes

public void reduce(Text key, Iterable<IntWritable> values, Context context) 
	throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
	sum += val.get();
}
context.write(key, new IntWritable(sum));
  }

Source File: SleepJob.java From RDFS with Apache License 2.0

5 votes

public JobConf setupJobConf(int numMapper, int numReducer, 
                              long mapSleepTime, int mapSleepCount, 
                              long reduceSleepTime, int reduceSleepCount,
                              boolean doSpeculation, List<String> slowMaps,
                              List<String> slowReduces, int slowRatio,
                              int countersPerTask, List<String> hosts,
                              int hostsPerSplit, boolean setup) {
  
  JobConf job = new JobConf(getConf(), SleepJob.class);
  job.setNumMapTasks(numMapper);
  job.setNumReduceTasks(numReducer);
  job.setMapperClass(SleepJob.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepJob.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setJobSetupCleanupNeeded(setup);
  job.setInputFormat(SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.class);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  job.setLong("sleep.job.map.sleep.time", mapSleepTime);
  job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime);
  job.setInt("sleep.job.map.sleep.count", mapSleepCount);
  job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount);
  job.setSpeculativeExecution(doSpeculation);
  job.setInt(SLOW_RATIO, slowRatio);
  job.setStrings(SLOW_MAPS, slowMaps.toArray(new String[slowMaps.size()]));
  job.setStrings(SLOW_REDUCES, slowMaps.toArray(new String[slowReduces.size()]));
  job.setInt("sleep.job.counters.per.task", countersPerTask);
  job.setStrings(HOSTS_FOR_LOCALITY, hosts.toArray(new String[hosts.size()]));
  job.setInt(HOSTS_PER_SPLIT, hostsPerSplit);
  return job;
}

Source File: WordMedian.java From pravega-samples with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        System.err.println("Usage: wordmedian <dummy_hdfs> <uri> <scope> <stream> <out>");
        return 0;
    }

    setConf(new Configuration());
    Configuration conf = getConf();

    conf.setStrings("input.pravega.uri", args[1]);
    conf.setStrings("input.pravega.scope", args[2]);
    conf.setStrings("input.pravega.stream", args[3]);
    conf.setStrings("input.pravega.deserializer", TextSerializer.class.getName());

    Job job = Job.getInstance(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(PravegaInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[4]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters()
            .getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[4], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

Source File: MultiFileWordCount.java From hadoop-gpu with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {

    if(args.length < 2) {
      printUsage();
      return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    //set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);
    
    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);
    
    return 0;
  }

Source File: TestMiniCoronaTaskFail.java From RDFS with Apache License 2.0

5 votes

public void map (LongWritable key, Text value,
                 OutputCollector<Text, IntWritable> output,
                 Reporter reporter) throws IOException {
  System.err.println(taskLog);
  if (taskid.endsWith("_0")) {
    throw new IOException();
  } else if (taskid.endsWith("_1")) {
    System.exit(-1);
  } else if (taskid.endsWith("_2")) {
    throw new Error();
  }
}

Source File: TestIFile.java From tez with Apache License 2.0

5 votes

@Test(timeout = 5000)
//Test appendValues feature
public void testAppendValues() throws IOException {
  List<KVPair> data = new ArrayList<KVPair>();
  List<IntWritable> values = new ArrayList<IntWritable>();

  Text key = new Text("key");
  IntWritable val = new IntWritable(1);
  for(int i = 0; i < 5; i++) {
    data.add(new KVPair(key, val));
    values.add(val);
  }

  IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath,
      Text.class, IntWritable.class, codec, null, null);
  writer.append(data.get(0).getKey(), data.get(0).getvalue()); //write first KV pair
  writer.appendValues(values.subList(1, values.size()).iterator()); //add the rest here

  Text lastKey = new Text("key3");
  IntWritable lastVal = new IntWritable(10);
  data.add(new KVPair(lastKey, lastVal));

  writer.append(lastKey, lastVal);
  writer.close();

  readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

Source File: TestComparators.java From hadoop with Apache License 2.0

5 votes

public void reduce(IntWritable key,
                   Iterator<IntWritable> values,
                   OutputCollector<IntWritable, Text> out,
                   Reporter reporter) throws IOException {
  // check key order
  int currentKey = key.get();
  if (currentKey < lastKey) {
    fail("Keys not in sorted ascending order");
  }
  lastKey = currentKey;
  // check order of values
  IntWritable previous = new IntWritable(Integer.MIN_VALUE);
  int valueCount = 0;
  while (values.hasNext()) {
    IntWritable current = values.next();
    
    // Check that the values are sorted
    if (current.compareTo(previous) < 0)
      fail("Values generated by Mapper not in order");
    previous = current;
    ++valueCount;
  }
  if (valueCount != 5) {
    fail("Values not grouped by primary key");
  }
  out.collect(key, new Text("success"));
}

Source File: SubarrayEndWithUDF.java From incubator-hivemall with Apache License 2.0

5 votes

public List<IntWritable> evaluate(List<IntWritable> original, IntWritable key) {
    if (original == null) {
        return null;
    }
    int toIndex = original.lastIndexOf(key);
    if (toIndex == -1) {
        return null;
    }
    return original.subList(0, toIndex + 1);
}

Source File: EthereumGetSendAddressUDF.java From hadoopcryptoledger with Apache License 2.0

5 votes

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
	if ((arguments==null) || (arguments.length!=2)) { 
		return null;
	}
	EthereumTransaction eTrans = this.ethereumUDFUtil.getEthereumTransactionFromObject(arguments[0].get());
	byte[] sendAddress=EthereumUtil.getSendAddress(eTrans, ((IntWritable)arguments[1].get()).get());
	if (sendAddress==null) {
		return null;
	}
	return new BytesWritable(sendAddress);
}

Source File: MapGetSumUDF.java From incubator-hivemall with Apache License 2.0

5 votes

public DoubleWritable evaluate(Map<IntWritable, FloatWritable> map, List<IntWritable> keys) {
    double sum = 0d;
    for (IntWritable k : keys) {
        FloatWritable v = map.get(k);
        if (v != null) {
            sum += (double) v.get();
        }
    }
    return val(sum);
}

Source File: TestLocalModeWithNewApis.java From big-c with Apache License 2.0

5 votes

public void reduce(Text key, Iterable<IntWritable> values, 
    Context context
    ) throws IOException, InterruptedException {
  int sum = 0;
  for (IntWritable val : values) {
    sum += val.get();
  }
  result.set(sum);
  context.write(key, result);
}

Source File: WordCount.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}

Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0

5 votes

@Override
public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
throws Exception {
	outT.f0 = v.f0;
	outT.f1 = new IntWritable(1);
	return outT;
}

Source File: MapredWordCount.java From tez with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output,
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}

Source File: HadoopMapFunctionITCase.java From flink with Apache License 2.0

5 votes

@Override
public void map(final IntWritable k, final Text v,
		final OutputCollector<IntWritable, Text> out, final Reporter r) throws IOException {
	if (v.toString().contains("bananas")) {
		out.collect(k, v);
	}
}

org.apache.hadoop.io.IntWritable Java Examples