org.apache.hadoop.io.IntWritable Java Examples
The following examples show how to use
org.apache.hadoop.io.IntWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopMapFunctionITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testConfigurableMapper() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); JobConf conf = new JobConf(); conf.set("my.filterPrefix", "Hello"); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env); DataSet<Tuple2<IntWritable, Text>> hellos = ds. flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf)); String resultPath = tempFolder.newFile().toURI().toString(); hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE); env.execute(); String expected = "(2,Hello)\n" + "(3,Hello world)\n" + "(4,Hello world, how are you?)\n"; compareResultsByLinesInMemory(expected, resultPath); }
Example #2
Source File: TestTezMerger.java From tez with Apache License 2.0 | 6 votes |
private List<TezMerger.Segment> createInMemorySegments(int segmentCount, int keysPerSegment) throws IOException { List<TezMerger.Segment> segmentList = Lists.newLinkedList(); Random rnd = new Random(); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); for (int i = 0; i < segmentCount; i++) { BoundedByteArrayOutputStream stream = new BoundedByteArrayOutputStream(10000); InMemoryWriter writer = new InMemoryWriter(stream); for (int j = 0; j < keysPerSegment; j++) { populateData(new IntWritable(rnd.nextInt()), new LongWritable(rnd.nextLong()), key, value); writer.append(key, value); } writer.close(); InMemoryReader reader = new InMemoryReader(merger, null, stream.getBuffer(), 0, stream.getLimit()); segmentList.add(new TezMerger.Segment(reader, null)); } return segmentList; }
Example #3
Source File: TestJoinTupleWritable.java From hadoop with Apache License 2.0 | 6 votes |
public void testNestedIterable() throws Exception { Random r = new Random(); Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) }; TupleWritable sTuple = makeTuple(writs); assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0)); }
Example #4
Source File: SleepJob.java From hadoop-book with Apache License 2.0 | 6 votes |
public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output, Reporter reporter) throws IOException { //it is expected that every map processes mapSleepCount number of records. try { reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left"); Thread.sleep(mapSleepDuration); } catch (InterruptedException ex) { throw (IOException) new IOException( "Interrupted while sleeping").initCause(ex); } ++count; // output reduceSleepCount * numReduce number of random values, so that // each reducer will get reduceSleepCount number of keys. int k = key.get(); for (int i = 0; i < value.get(); ++i) { output.collect(new IntWritable(k + i), NullWritable.get()); } }
Example #5
Source File: ActiveUserCollector.java From BigDataPlatform with GNU General Public License v3.0 | 6 votes |
@Override public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, PreparedStatement preparedStatement, IDimensionConverter converter) throws SQLException, IOException { StatsUserDimension statsUser = (StatsUserDimension)key; MapWritableValue mapWritableValue = (MapWritableValue)value; IntWritable activeUserValue = (IntWritable) mapWritableValue.getValue().get(new IntWritable(-1)); int i = 0; preparedStatement.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getPlatform())); preparedStatement.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getDate())); preparedStatement.setInt(++i, activeUserValue.get()); preparedStatement.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMS)); preparedStatement.setInt(++i, activeUserValue.get()); preparedStatement.addBatch(); }
Example #6
Source File: StatsUserNewInstallUserCollector.java From BigDataArchitect with Apache License 2.0 | 6 votes |
@Override public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, PreparedStatement pstmt, IDimensionConverter converter) throws SQLException, IOException { StatsUserDimension statsUserDimension = (StatsUserDimension) key; MapWritableValue mapWritableValue = (MapWritableValue) value; IntWritable newInstallUsers = (IntWritable) mapWritableValue.getValue().get(new IntWritable(-1)); int i = 0; pstmt.setInt(++i, converter.getDimensionIdByValue(statsUserDimension.getStatsCommon().getPlatform())); pstmt.setInt(++i, converter.getDimensionIdByValue(statsUserDimension.getStatsCommon().getDate())); pstmt.setInt(++i, newInstallUsers.get()); pstmt.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMES)); pstmt.setInt(++i, newInstallUsers.get()); pstmt.addBatch();//往批处理放入数据 }
Example #7
Source File: TestIFile.java From tez with Apache License 2.0 | 6 votes |
@Test(timeout = 5000) // Test empty file case public void testEmptyFileBackedInMemIFileWriter() throws IOException { List<KVPair> data = new ArrayList<>(); TezTaskOutputFiles tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1); IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter(defaultConf, localFs, tezTaskOutput, Text.class, IntWritable.class, codec, null, null, 100); // empty ifile writer.close(); byte[] bytes = new byte[(int) writer.getRawLength()]; IFile.Reader.readToMemory(bytes, new ByteArrayInputStream(ByteString.copyFrom(writer.getData()).toByteArray()), (int) writer.getCompressedLength(), codec, false, -1); readUsingInMemoryReader(bytes, data); }
Example #8
Source File: TestIFile.java From incubator-tez with Apache License 2.0 | 6 votes |
@Test //Test appendValue with DataInputBuffer public void testAppendValueWithDataInputBuffer() throws IOException { List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100)); IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null); final DataInputBuffer previousKey = new DataInputBuffer(); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); for (KVPair kvp : data) { populateData(kvp, key, value); if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) { writer.appendValue(value); } else { writer.append(key, value); } previousKey.reset(k.getData(), 0, k.getLength()); } writer.close(); readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); }
Example #9
Source File: TestJobCounters.java From big-c with Apache License 2.0 | 6 votes |
public static Job createJob() throws IOException { final Configuration conf = new Configuration(); final Job baseJob = Job.getInstance(conf); baseJob.setOutputKeyClass(Text.class); baseJob.setOutputValueClass(IntWritable.class); baseJob.setMapperClass(NewMapTokenizer.class); baseJob.setCombinerClass(NewSummer.class); baseJob.setReducerClass(NewSummer.class); baseJob.setNumReduceTasks(1); baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1); baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50"); baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize( baseJob, Long.MAX_VALUE); return baseJob; }
Example #10
Source File: TaskTracker.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** Queries the job tracker for a set of outputs ready to be copied * @param fromEventId the first event ID we want to start from, this is * modified by the call to this method * @param jobClient the job tracker * @return a set of locations to copy outputs from * @throws IOException */ private List<TaskCompletionEvent> queryJobTracker(IntWritable fromEventId, JobID jobId, InterTrackerProtocol jobClient) throws IOException { TaskCompletionEvent t[] = jobClient.getTaskCompletionEvents( jobId, fromEventId.get(), probe_sample_size); //we are interested in map task completion events only. So store //only those List <TaskCompletionEvent> recentMapEvents = new ArrayList<TaskCompletionEvent>(); for (int i = 0; i < t.length; i++) { if (t[i].isMap) { recentMapEvents.add(t[i]); } } fromEventId.set(fromEventId.get() + t.length); return recentMapEvents; }
Example #11
Source File: UpdateCFJob.java From recsys-offline with Apache License 2.0 | 6 votes |
public void run() throws Exception{ long startTime = System.currentTimeMillis(); Configuration conf = new Configuration(); conf.set(TableOutputFormat.OUTPUT_TABLE, Constants.hbase_user_item_pref_table); Job job = Job.getInstance(conf, "hbasewriter"+System.currentTimeMillis()); job.setJarByClass(UpdateCFJob.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(HBaseWriteReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TableOutputFormat.class); FileInputFormat.addInputPath(job, new Path(input)); long endTime = System.currentTimeMillis(); boolean isFinish = job.waitForCompletion(true); if(isFinish){ logger.info("UpdateCFJob job ["+job.getJobName()+"] run finish.it costs"+ (endTime - startTime) / 1000 +"s."); } else { logger.error("UpdateCFJob job ["+job.getJobName()+"] run failed."); } }
Example #12
Source File: TestEsriJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
@Test public void TestIntParse() throws Exception { Configuration config = new Configuration(); Text value = new Text(); AbstractSerDe jserde = new EsriJsonSerDe(); Properties proptab = new Properties(); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "num"); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "int"); jserde.initialize(config, proptab); StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector(); //value.set("{\"attributes\":{\"num\":7},\"geometry\":null}"); value.set("{\"attributes\":{\"num\":7}}"); Object row = jserde.deserialize(value); StructField f0 = rowOI.getStructFieldRef("num"); Object fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals(7, ((IntWritable)fieldData).get()); value.set("{\"attributes\":{\"num\":9}}"); row = jserde.deserialize(value); f0 = rowOI.getStructFieldRef("num"); fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals(9, ((IntWritable)fieldData).get()); }
Example #13
Source File: Step32.java From recsys-offline with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "wiki job one"); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setNumReduceTasks(1); job1.setJarByClass(Step32.class); job1.setMapperClass(WikiMapper32.class); job1.setMapOutputKeyClass(IntWritable.class); job1.setMapOutputValueClass(VectorOrPrefWritable.class); job1.setReducerClass(WiKiReducer32.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(VectorOrPrefWritable.class); // the WiKiDriver's out put is this one's input SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH)); SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH)); if(!job1.waitForCompletion(true)){ System.exit(1); // run error then exit } }
Example #14
Source File: ActiveUserBrowserCollector.java From BigDataArchitect with Apache License 2.0 | 6 votes |
@Override public void collect(Configuration conf, BaseDimension key, BaseStatsValueWritable value, PreparedStatement pstmt, IDimensionConverter converter) throws SQLException, IOException { // 进行强制后获取对应的值 StatsUserDimension statsUser = (StatsUserDimension) key; IntWritable activeUserValue = (IntWritable) ((MapWritableValue) value).getValue().get(new IntWritable(-1)); // 进行参数设置 int i = 0; pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getPlatform())); pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getStatsCommon().getDate())); pstmt.setInt(++i, converter.getDimensionIdByValue(statsUser.getBrowser())); pstmt.setInt(++i, activeUserValue.get()); pstmt.setString(++i, conf.get(GlobalConstants.RUNNING_DATE_PARAMES)); pstmt.setInt(++i, activeUserValue.get()); // 添加到batch中 pstmt.addBatch(); }
Example #15
Source File: BroadcastLoadGen.java From tez with Apache License 2.0 | 6 votes |
private DAG createDAG(int numGenTasks, int totalSourceDataSize, int numFetcherTasks) { int bytesPerSource = totalSourceDataSize / numGenTasks; LOG.info("DataPerSourceTask(bytes)=" + bytesPerSource); ByteBuffer payload = ByteBuffer.allocate(4); payload.putInt(0, bytesPerSource); Vertex broadcastVertex = Vertex.create("DataGen", ProcessorDescriptor.create(InputGenProcessor.class.getName()) .setUserPayload(UserPayload.create(payload)), numGenTasks); Vertex fetchVertex = Vertex.create("FetchVertex", ProcessorDescriptor.create(InputFetchProcessor.class.getName()), numFetcherTasks); UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(NullWritable.class .getName(), IntWritable.class.getName()).setCompression(false, null, null).build(); DAG dag = DAG.create("BroadcastLoadGen"); dag.addVertex(broadcastVertex).addVertex(fetchVertex).addEdge( Edge.create(broadcastVertex, fetchVertex, edgeConf.createDefaultBroadcastEdgeProperty())); return dag; }
Example #16
Source File: PVMinMax.java From MapReduce-Demo with MIT License | 5 votes |
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); }
Example #17
Source File: SleepJob.java From RDFS with Apache License 2.0 | 5 votes |
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, boolean doSpeculation, List<String> slowMaps, List<String> slowReduces, int slowRatio, int countersPerTask, List<String> hosts, int hostsPerSplit, boolean setup) { JobConf job = new JobConf(getConf(), SleepJob.class); job.setNumMapTasks(numMapper); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepJob.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepJob.class); job.setOutputFormat(NullOutputFormat.class); job.setJobSetupCleanupNeeded(setup); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(SleepJob.class); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); job.setSpeculativeExecution(doSpeculation); job.setInt(SLOW_RATIO, slowRatio); job.setStrings(SLOW_MAPS, slowMaps.toArray(new String[slowMaps.size()])); job.setStrings(SLOW_REDUCES, slowMaps.toArray(new String[slowReduces.size()])); job.setInt("sleep.job.counters.per.task", countersPerTask); job.setStrings(HOSTS_FOR_LOCALITY, hosts.toArray(new String[hosts.size()])); job.setInt(HOSTS_PER_SPLIT, hostsPerSplit); return job; }
Example #18
Source File: WordMedian.java From pravega-samples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { if (args.length != 5) { System.err.println("Usage: wordmedian <dummy_hdfs> <uri> <scope> <stream> <out>"); return 0; } setConf(new Configuration()); Configuration conf = getConf(); conf.setStrings("input.pravega.uri", args[1]); conf.setStrings("input.pravega.scope", args[2]); conf.setStrings("input.pravega.stream", args[3]); conf.setStrings("input.pravega.deserializer", TextSerializer.class.getName()); Job job = Job.getInstance(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(PravegaInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[4])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters() .getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[4], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
Example #19
Source File: MultiFileWordCount.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception { if(args.length < 2) { printUsage(); return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); //set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
Example #20
Source File: TestMiniCoronaTaskFail.java From RDFS with Apache License 2.0 | 5 votes |
public void map (LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { System.err.println(taskLog); if (taskid.endsWith("_0")) { throw new IOException(); } else if (taskid.endsWith("_1")) { System.exit(-1); } else if (taskid.endsWith("_2")) { throw new Error(); } }
Example #21
Source File: TestIFile.java From tez with Apache License 2.0 | 5 votes |
@Test(timeout = 5000) //Test appendValues feature public void testAppendValues() throws IOException { List<KVPair> data = new ArrayList<KVPair>(); List<IntWritable> values = new ArrayList<IntWritable>(); Text key = new Text("key"); IntWritable val = new IntWritable(1); for(int i = 0; i < 5; i++) { data.add(new KVPair(key, val)); values.add(val); } IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null); writer.append(data.get(0).getKey(), data.get(0).getvalue()); //write first KV pair writer.appendValues(values.subList(1, values.size()).iterator()); //add the rest here Text lastKey = new Text("key3"); IntWritable lastVal = new IntWritable(10); data.add(new KVPair(lastKey, lastVal)); writer.append(lastKey, lastVal); writer.close(); readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); }
Example #22
Source File: TestComparators.java From hadoop with Apache License 2.0 | 5 votes |
public void reduce(IntWritable key, Iterator<IntWritable> values, OutputCollector<IntWritable, Text> out, Reporter reporter) throws IOException { // check key order int currentKey = key.get(); if (currentKey < lastKey) { fail("Keys not in sorted ascending order"); } lastKey = currentKey; // check order of values IntWritable previous = new IntWritable(Integer.MIN_VALUE); int valueCount = 0; while (values.hasNext()) { IntWritable current = values.next(); // Check that the values are sorted if (current.compareTo(previous) < 0) fail("Values generated by Mapper not in order"); previous = current; ++valueCount; } if (valueCount != 5) { fail("Values not grouped by primary key"); } out.collect(key, new Text("success")); }
Example #23
Source File: SubarrayEndWithUDF.java From incubator-hivemall with Apache License 2.0 | 5 votes |
public List<IntWritable> evaluate(List<IntWritable> original, IntWritable key) { if (original == null) { return null; } int toIndex = original.lastIndexOf(key); if (toIndex == -1) { return null; } return original.subList(0, toIndex + 1); }
Example #24
Source File: EthereumGetSendAddressUDF.java From hadoopcryptoledger with Apache License 2.0 | 5 votes |
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if ((arguments==null) || (arguments.length!=2)) { return null; } EthereumTransaction eTrans = this.ethereumUDFUtil.getEthereumTransactionFromObject(arguments[0].get()); byte[] sendAddress=EthereumUtil.getSendAddress(eTrans, ((IntWritable)arguments[1].get()).get()); if (sendAddress==null) { return null; } return new BytesWritable(sendAddress); }
Example #25
Source File: MapGetSumUDF.java From incubator-hivemall with Apache License 2.0 | 5 votes |
public DoubleWritable evaluate(Map<IntWritable, FloatWritable> map, List<IntWritable> keys) { double sum = 0d; for (IntWritable k : keys) { FloatWritable v = map.get(k); if (v != null) { sum += (double) v.get(); } } return val(sum); }
Example #26
Source File: TestLocalModeWithNewApis.java From big-c with Apache License 2.0 | 5 votes |
public void reduce(Text key, Iterable<IntWritable> values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); }
Example #27
Source File: WordCount.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
Example #28
Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0 | 5 votes |
@Override public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v) throws Exception { outT.f0 = v.f0; outT.f1 = new IntWritable(1); return outT; }
Example #29
Source File: MapredWordCount.java From tez with Apache License 2.0 | 5 votes |
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); }
Example #30
Source File: HadoopMapFunctionITCase.java From flink with Apache License 2.0 | 5 votes |
@Override public void map(final IntWritable k, final Text v, final OutputCollector<IntWritable, Text> out, final Reporter r) throws IOException { if (v.toString().contains("bananas")) { out.collect(k, v); } }