org.apache.hadoop.io.IntWritable Java Examples

The following examples show how to use org.apache.hadoop.io.IntWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: HadoopMapFunctionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testConfigurableMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.filterPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> hellos = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(2,Hello)\n" +
			"(3,Hello world)\n" +
			"(4,Hello world, how are you?)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Example #2

Source File: TestTezMerger.java From tez with Apache License 2.0

6 votes

private List<TezMerger.Segment> createInMemorySegments(int segmentCount, int keysPerSegment)
    throws IOException {
  List<TezMerger.Segment> segmentList = Lists.newLinkedList();
  Random rnd = new Random();
  DataInputBuffer key = new DataInputBuffer();
  DataInputBuffer value = new DataInputBuffer();
  for (int i = 0; i < segmentCount; i++) {
    BoundedByteArrayOutputStream stream = new BoundedByteArrayOutputStream(10000);
    InMemoryWriter writer = new InMemoryWriter(stream);

    for (int j = 0; j < keysPerSegment; j++) {
      populateData(new IntWritable(rnd.nextInt()), new LongWritable(rnd.nextLong()), key, value);
      writer.append(key, value);
    }
    writer.close();
    InMemoryReader reader = new InMemoryReader(merger, null, stream.getBuffer(), 0, stream.getLimit());

    segmentList.add(new TezMerger.Segment(reader, null));
  }
  return segmentList;
}

Example #3

Source File: TestJoinTupleWritable.java From hadoop with Apache License 2.0

6 votes

public void testNestedIterable() throws Exception {
  Random r = new Random();
  Writable[] writs = {
    new BooleanWritable(r.nextBoolean()),
    new FloatWritable(r.nextFloat()),
    new FloatWritable(r.nextFloat()),
    new IntWritable(r.nextInt()),
    new LongWritable(r.nextLong()),
    new BytesWritable("dingo".getBytes()),
    new LongWritable(r.nextLong()),
    new IntWritable(r.nextInt()),
    new BytesWritable("yak".getBytes()),
    new IntWritable(r.nextInt())
  };
  TupleWritable sTuple = makeTuple(writs);
  assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}

Example #4

Source File: SleepJob.java From hadoop-book with Apache License 2.0

6 votes

public void map(IntWritable key, IntWritable value,
        OutputCollector<IntWritable, NullWritable> output, Reporter reporter)
        throws IOException {

    //it is expected that every map processes mapSleepCount number of records. 
    try {
        reporter.setStatus("Sleeping... ("
                + (mapSleepDuration * (mapSleepCount - count)) + ") ms left");
        Thread.sleep(mapSleepDuration);
    } catch (InterruptedException ex) {
        throw (IOException) new IOException(
                "Interrupted while sleeping").initCause(ex);
    }
    ++count;
    // output reduceSleepCount * numReduce number of random values, so that
    // each reducer will get reduceSleepCount number of keys.
    int k = key.get();
    for (int i = 0; i < value.get(); ++i) {
        output.collect(new IntWritable(k + i), NullWritable.get());
    }
}

Example #5

Source File: ActiveUserCollector.java From BigDataPlatform with GNU General Public License v3.0

6 votes

@Override
public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value,
    PreparedStatement preparedStatement, IDimensionConverter converter)
    throws SQLException, IOException {
  StatsUserDimension statsUser = (StatsUserDimension)key;
  MapWritableValue mapWritableValue = (MapWritableValue)value;
  IntWritable activeUserValue = (IntWritable) mapWritableValue.getValue().get(new IntWritable(-1));

  int i = 0;
  preparedStatement.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getPlatform()));
  preparedStatement.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getDate()));
  preparedStatement.setInt(++i, activeUserValue.get());
  preparedStatement.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMS));
  preparedStatement.setInt(++i, activeUserValue.get());
  preparedStatement.addBatch();
}

Example #6

Source File: StatsUserNewInstallUserCollector.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, 
		PreparedStatement pstmt, IDimensionConverter converter) 
				throws SQLException, IOException {
    StatsUserDimension statsUserDimension = (StatsUserDimension) key;
    MapWritableValue mapWritableValue = (MapWritableValue) value;
    IntWritable newInstallUsers = (IntWritable) mapWritableValue.getValue().get(new IntWritable(-1));

    int i = 0;
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUserDimension.getStatsCommon().getPlatform()));
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUserDimension.getStatsCommon().getDate()));
    pstmt.setInt(++i, newInstallUsers.get());
    pstmt.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMES));
    pstmt.setInt(++i, newInstallUsers.get());
    pstmt.addBatch();//往批处理放入数据
}

Example #7

Source File: TestIFile.java From tez with Apache License 2.0

6 votes

@Test(timeout = 5000)
// Test empty file case
public void testEmptyFileBackedInMemIFileWriter() throws IOException {
  List<KVPair> data = new ArrayList<>();
  TezTaskOutputFiles
      tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1);

  IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter(defaultConf, localFs, tezTaskOutput,
      Text.class, IntWritable.class, codec, null, null,
      100);

  // empty ifile
  writer.close();

  byte[] bytes = new byte[(int) writer.getRawLength()];

  IFile.Reader.readToMemory(bytes,
      new ByteArrayInputStream(ByteString.copyFrom(writer.getData()).toByteArray()),
      (int) writer.getCompressedLength(), codec, false, -1);

  readUsingInMemoryReader(bytes, data);
}

Example #8

Source File: TestIFile.java From incubator-tez with Apache License 2.0

6 votes

@Test
//Test appendValue with DataInputBuffer
public void testAppendValueWithDataInputBuffer() throws IOException {
  List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
  IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath,
      Text.class, IntWritable.class, codec, null, null);

  final DataInputBuffer previousKey = new DataInputBuffer();
  DataInputBuffer key = new DataInputBuffer();
  DataInputBuffer value = new DataInputBuffer();
  for (KVPair kvp : data) {
    populateData(kvp, key, value);

    if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
      writer.appendValue(value);
    } else {
      writer.append(key, value);
    }
    previousKey.reset(k.getData(), 0, k.getLength());
  }

  writer.close();

  readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

Example #9

Source File: TestJobCounters.java From big-c with Apache License 2.0

6 votes

public static Job createJob() throws IOException {
  final Configuration conf = new Configuration();
  final Job baseJob = Job.getInstance(conf);
  baseJob.setOutputKeyClass(Text.class);
  baseJob.setOutputValueClass(IntWritable.class);
  baseJob.setMapperClass(NewMapTokenizer.class);
  baseJob.setCombinerClass(NewSummer.class);
  baseJob.setReducerClass(NewSummer.class);
  baseJob.setNumReduceTasks(1);
  baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
  baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
  baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize(
      baseJob, Long.MAX_VALUE);
  return baseJob;
}

Example #10

Source File: TaskTracker.java From hadoop-gpu with Apache License 2.0

6 votes

/** Queries the job tracker for a set of outputs ready to be copied
 * @param fromEventId the first event ID we want to start from, this is
 * modified by the call to this method
 * @param jobClient the job tracker
 * @return a set of locations to copy outputs from
 * @throws IOException
 */  
private List<TaskCompletionEvent> queryJobTracker(IntWritable fromEventId,
                                                  JobID jobId,
                                                  InterTrackerProtocol jobClient)
  throws IOException {

  TaskCompletionEvent t[] = jobClient.getTaskCompletionEvents(
                                                              jobId,
                                                              fromEventId.get(),
                                                              probe_sample_size);
  //we are interested in map task completion events only. So store
  //only those
  List <TaskCompletionEvent> recentMapEvents = 
    new ArrayList<TaskCompletionEvent>();
  for (int i = 0; i < t.length; i++) {
    if (t[i].isMap) {
      recentMapEvents.add(t[i]);
    }
  }
  fromEventId.set(fromEventId.get() + t.length);
  return recentMapEvents;
}

Example #11

Source File: UpdateCFJob.java From recsys-offline with Apache License 2.0

6 votes

public void run() throws Exception{
	long startTime = System.currentTimeMillis();
	Configuration conf = new Configuration();
	conf.set(TableOutputFormat.OUTPUT_TABLE, Constants.hbase_user_item_pref_table);
	Job job = Job.getInstance(conf, "hbasewriter"+System.currentTimeMillis());
	job.setJarByClass(UpdateCFJob.class);
	job.setMapperClass(TokenizerMapper.class);
	job.setReducerClass(HBaseWriteReducer.class);
	job.setMapOutputKeyClass(IntWritable.class);  
	job.setMapOutputValueClass(Text.class);
	job.setOutputFormatClass(TableOutputFormat.class);
	FileInputFormat.addInputPath(job, new Path(input));
	long endTime = System.currentTimeMillis();
	boolean isFinish = job.waitForCompletion(true);
	if(isFinish){
		logger.info("UpdateCFJob job ["+job.getJobName()+"] run finish.it costs"+ (endTime - startTime) / 1000 +"s.");
	} else {
		logger.error("UpdateCFJob job ["+job.getJobName()+"] run failed.");
	}
}

Example #12

Source File: TestEsriJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0

6 votes

@Test
public void TestIntParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new EsriJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "num");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "int");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       //value.set("{\"attributes\":{\"num\":7},\"geometry\":null}");
       value.set("{\"attributes\":{\"num\":7}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("num");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals(7, ((IntWritable)fieldData).get());
       value.set("{\"attributes\":{\"num\":9}}");
       row = jserde.deserialize(value);
	f0 = rowOI.getStructFieldRef("num");
	fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals(9, ((IntWritable)fieldData).get());
}

Example #13

Source File: Step32.java From recsys-offline with Apache License 2.0

6 votes

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
    // TODO Auto-generated method stub  
    Configuration conf1 = new Configuration();  

    Job job1 = new Job(conf1, "wiki  job one");  
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);  
    job1.setInputFormatClass(SequenceFileInputFormat.class);  
    job1.setNumReduceTasks(1);  
    job1.setJarByClass(Step32.class);  
    job1.setMapperClass(WikiMapper32.class);  
    job1.setMapOutputKeyClass(IntWritable.class);  
    job1.setMapOutputValueClass(VectorOrPrefWritable.class);  
  
    job1.setReducerClass(WiKiReducer32.class);  
    job1.setOutputKeyClass(IntWritable.class);  
    job1.setOutputValueClass(VectorOrPrefWritable.class);  
      
    // the WiKiDriver's out put is this one's input  
    SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH));  
    SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH));     
    if(!job1.waitForCompletion(true)){  
        System.exit(1); // run error then exit  
    }  
}

Example #14

Source File: ActiveUserBrowserCollector.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, PreparedStatement pstmt, IDimensionConverter converter) throws SQLException, IOException {
 // 进行强制后获取对应的值
    StatsUserDimension statsUser = (StatsUserDimension) key;
    IntWritable activeUserValue = (IntWritable) ((MapWritableValue) value).getValue().get(new IntWritable(-1));

    // 进行参数设置
    int i = 0;
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getPlatform()));
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getDate()));
    pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getBrowser()));
    pstmt.setInt(++i, activeUserValue.get());
    pstmt.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMES));
    pstmt.setInt(++i, activeUserValue.get());

    // 添加到batch中
    pstmt.addBatch();
}

Example #15

Source File: BroadcastLoadGen.java From tez with Apache License 2.0

6 votes

private DAG createDAG(int numGenTasks, int totalSourceDataSize, int numFetcherTasks) {
  int bytesPerSource = totalSourceDataSize / numGenTasks;
  LOG.info("DataPerSourceTask(bytes)=" + bytesPerSource);
  ByteBuffer payload = ByteBuffer.allocate(4);
  payload.putInt(0, bytesPerSource);

  Vertex broadcastVertex = Vertex.create("DataGen",
      ProcessorDescriptor.create(InputGenProcessor.class.getName())
          .setUserPayload(UserPayload.create(payload)), numGenTasks);
  Vertex fetchVertex = Vertex.create("FetchVertex",
      ProcessorDescriptor.create(InputFetchProcessor.class.getName()), numFetcherTasks);
  UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(NullWritable.class
  .getName(), IntWritable.class.getName()).setCompression(false, null, null).build();

  DAG dag = DAG.create("BroadcastLoadGen");
  dag.addVertex(broadcastVertex).addVertex(fetchVertex).addEdge(
      Edge.create(broadcastVertex, fetchVertex, edgeConf.createDefaultBroadcastEdgeProperty()));
  return dag;
}

Example #16

Source File: PVMinMax.java From MapReduce-Demo with MIT License

5 votes

public void reduce(Text key, Iterable<IntWritable> values, Context context) 
	throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
	sum += val.get();
}
context.write(key, new IntWritable(sum));
  }

Example #17

Source File: SleepJob.java From RDFS with Apache License 2.0

5 votes

public JobConf setupJobConf(int numMapper, int numReducer, 
                              long mapSleepTime, int mapSleepCount, 
                              long reduceSleepTime, int reduceSleepCount,
                              boolean doSpeculation, List<String> slowMaps,
                              List<String> slowReduces, int slowRatio,
                              int countersPerTask, List<String> hosts,
                              int hostsPerSplit, boolean setup) {
  
  JobConf job = new JobConf(getConf(), SleepJob.class);
  job.setNumMapTasks(numMapper);
  job.setNumReduceTasks(numReducer);
  job.setMapperClass(SleepJob.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepJob.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setJobSetupCleanupNeeded(setup);
  job.setInputFormat(SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.class);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  job.setLong("sleep.job.map.sleep.time", mapSleepTime);
  job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime);
  job.setInt("sleep.job.map.sleep.count", mapSleepCount);
  job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount);
  job.setSpeculativeExecution(doSpeculation);
  job.setInt(SLOW_RATIO, slowRatio);
  job.setStrings(SLOW_MAPS, slowMaps.toArray(new String[slowMaps.size()]));
  job.setStrings(SLOW_REDUCES, slowMaps.toArray(new String[slowReduces.size()]));
  job.setInt("sleep.job.counters.per.task", countersPerTask);
  job.setStrings(HOSTS_FOR_LOCALITY, hosts.toArray(new String[hosts.size()]));
  job.setInt(HOSTS_PER_SPLIT, hostsPerSplit);
  return job;
}

Example #18

Source File: WordMedian.java From pravega-samples with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        System.err.println("Usage: wordmedian <dummy_hdfs> <uri> <scope> <stream> <out>");
        return 0;
    }

    setConf(new Configuration());
    Configuration conf = getConf();

    conf.setStrings("input.pravega.uri", args[1]);
    conf.setStrings("input.pravega.scope", args[2]);
    conf.setStrings("input.pravega.stream", args[3]);
    conf.setStrings("input.pravega.deserializer", TextSerializer.class.getName());

    Job job = Job.getInstance(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(PravegaInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[4]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters()
            .getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[4], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

Example #19

Source File: MultiFileWordCount.java From hadoop-gpu with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {

    if(args.length < 2) {
      printUsage();
      return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    //set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);
    
    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);
    
    return 0;
  }

Example #20

Source File: TestMiniCoronaTaskFail.java From RDFS with Apache License 2.0

5 votes

public void map (LongWritable key, Text value,
                 OutputCollector<Text, IntWritable> output,
                 Reporter reporter) throws IOException {
  System.err.println(taskLog);
  if (taskid.endsWith("_0")) {
    throw new IOException();
  } else if (taskid.endsWith("_1")) {
    System.exit(-1);
  } else if (taskid.endsWith("_2")) {
    throw new Error();
  }
}

Example #21

Source File: TestIFile.java From tez with Apache License 2.0

5 votes

@Test(timeout = 5000)
//Test appendValues feature
public void testAppendValues() throws IOException {
  List<KVPair> data = new ArrayList<KVPair>();
  List<IntWritable> values = new ArrayList<IntWritable>();

  Text key = new Text("key");
  IntWritable val = new IntWritable(1);
  for(int i = 0; i < 5; i++) {
    data.add(new KVPair(key, val));
    values.add(val);
  }

  IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath,
      Text.class, IntWritable.class, codec, null, null);
  writer.append(data.get(0).getKey(), data.get(0).getvalue()); //write first KV pair
  writer.appendValues(values.subList(1, values.size()).iterator()); //add the rest here

  Text lastKey = new Text("key3");
  IntWritable lastVal = new IntWritable(10);
  data.add(new KVPair(lastKey, lastVal));

  writer.append(lastKey, lastVal);
  writer.close();

  readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

Example #22

Source File: TestComparators.java From hadoop with Apache License 2.0

5 votes

public void reduce(IntWritable key,
                   Iterator<IntWritable> values,
                   OutputCollector<IntWritable, Text> out,
                   Reporter reporter) throws IOException {
  // check key order
  int currentKey = key.get();
  if (currentKey < lastKey) {
    fail("Keys not in sorted ascending order");
  }
  lastKey = currentKey;
  // check order of values
  IntWritable previous = new IntWritable(Integer.MIN_VALUE);
  int valueCount = 0;
  while (values.hasNext()) {
    IntWritable current = values.next();
    
    // Check that the values are sorted
    if (current.compareTo(previous) < 0)
      fail("Values generated by Mapper not in order");
    previous = current;
    ++valueCount;
  }
  if (valueCount != 5) {
    fail("Values not grouped by primary key");
  }
  out.collect(key, new Text("success"));
}

Example #23

Source File: SubarrayEndWithUDF.java From incubator-hivemall with Apache License 2.0

5 votes

public List<IntWritable> evaluate(List<IntWritable> original, IntWritable key) {
    if (original == null) {
        return null;
    }
    int toIndex = original.lastIndexOf(key);
    if (toIndex == -1) {
        return null;
    }
    return original.subList(0, toIndex + 1);
}

Example #24

Source File: EthereumGetSendAddressUDF.java From hadoopcryptoledger with Apache License 2.0

5 votes

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
	if ((arguments==null) || (arguments.length!=2)) { 
		return null;
	}
	EthereumTransaction eTrans = this.ethereumUDFUtil.getEthereumTransactionFromObject(arguments[0].get());
	byte[] sendAddress=EthereumUtil.getSendAddress(eTrans, ((IntWritable)arguments[1].get()).get());
	if (sendAddress==null) {
		return null;
	}
	return new BytesWritable(sendAddress);
}

Example #25

Source File: MapGetSumUDF.java From incubator-hivemall with Apache License 2.0

5 votes

public DoubleWritable evaluate(Map<IntWritable, FloatWritable> map, List<IntWritable> keys) {
    double sum = 0d;
    for (IntWritable k : keys) {
        FloatWritable v = map.get(k);
        if (v != null) {
            sum += (double) v.get();
        }
    }
    return val(sum);
}

Example #26

Source File: TestLocalModeWithNewApis.java From big-c with Apache License 2.0

5 votes

public void reduce(Text key, Iterable<IntWritable> values, 
    Context context
    ) throws IOException, InterruptedException {
  int sum = 0;
  for (IntWritable val : values) {
    sum += val.get();
  }
  result.set(sum);
  context.write(key, result);
}

Example #27

Source File: WordCount.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}

Example #28

Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0

5 votes

@Override
public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
throws Exception {
	outT.f0 = v.f0;
	outT.f1 = new IntWritable(1);
	return outT;
}

Example #29

Source File: MapredWordCount.java From tez with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output,
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}

Example #30

Source File: HadoopMapFunctionITCase.java From flink with Apache License 2.0

5 votes

@Override
public void map(final IntWritable k, final Text v,
		final OutputCollector<IntWritable, Text> out, final Reporter r) throws IOException {
	if (v.toString().contains("bananas")) {
		out.collect(k, v);
	}
}