org.apache.hadoop.io.LongWritable Java Examples
The following examples show how to use
org.apache.hadoop.io.LongWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: merge_results_mapper.java From MLHadoop with Apache License 2.0 | 6 votes |
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] parts = value.toString().split("\\t"); // Processing Upper Triangular Matrix's rows if (this.upper && !parts[0].contains(",")) { context.write(new TextPair(parts[0],""), new Text(parts[1])); } // Processing Lower Triangular Matrix's rows if (!this.upper && parts[0].contains(",")) { String[] rowCol = parts[0].split(","); String row = rowCol[0]; // Sending first row of Lower Triangular Matrix to the reducer if (Integer.valueOf(row)-1 == 0) { for (int i = 0; i < this.total_records; i++) { context.write(new TextPair("0",String.valueOf(i)), new Text(i+","+((i == 0) ? 1 : 0))); } } String column = rowCol[1]; String element = parts[1]; context.write(new TextPair(row, column), new Text(column+","+element)); } }
Example #2
Source File: MapOutputCorrectness.java From RDFS with Apache License 2.0 | 6 votes |
@Override public void configure(JobConf job) { this.conf = job; taskPartition = conf.getInt("mapred.task.partition", -1); int startingSeed = conf.getInt(SEED, -1) + taskPartition; random = new Random(startingSeed); LOG.info("Starting with seed " + startingSeed + " on partition " + taskPartition); numKeysPerMapper = conf.getInt(NUM_KEYS_PER_MAPPER, -1); numValuesPerKey = conf.getInt(NUM_VALUES_PER_KEY, -1); numMappers = conf.getNumMapTasks(); numReducers = conf.getInt("mapred.reduce.tasks", -1); maxKeySpace = conf.getInt(MAX_KEY_SPACE, DEFAULT_MAX_KEY_SPACE); chanceFailure = conf.getFloat(CHANCE_FAILURE, 0.0f); if (numKeysPerMapper == -1 || numValuesPerKey == -1 || numReducers == -1 || maxKeySpace == -1) { throw new IllegalArgumentException( "Illegal values " + numKeysPerMapper + " " + numValuesPerKey + " " + numReducers + " " + maxKeySpace); } for (int i = 0; i < numMappers; ++i) { mapperSumList.add(new LongWritable(0)); expectedMapperSumList.add(new LongWritable(-1)); } }
Example #3
Source File: AUCUDAF.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { ClassificationAUCAggregationBuffer myAggr = (ClassificationAUCAggregationBuffer) agg; Object[] partialResult = new Object[11]; partialResult[0] = new DoubleWritable(myAggr.indexScore); partialResult[1] = new DoubleWritable(myAggr.area); partialResult[2] = new LongWritable(myAggr.fp); partialResult[3] = new LongWritable(myAggr.tp); partialResult[4] = new LongWritable(myAggr.fpPrev); partialResult[5] = new LongWritable(myAggr.tpPrev); partialResult[6] = myAggr.areaPartialMap; partialResult[7] = myAggr.fpPartialMap; partialResult[8] = myAggr.tpPartialMap; partialResult[9] = myAggr.fpPrevPartialMap; partialResult[10] = myAggr.tpPrevPartialMap; return partialResult; }
Example #4
Source File: LindenMapredTest.java From linden with Apache License 2.0 | 6 votes |
@Test public void TestMapper() throws IOException { try { String propertiesFilePath = LindenMapredTest.class.getClassLoader().getResource("linden.properties").getFile(); Files.copy(new File(propertiesFilePath).toPath(), Paths.get("lindenProperties"), StandardCopyOption.REPLACE_EXISTING); String schemaFilePath = LindenMapredTest.class.getClassLoader().getResource("schema.xml").getFile(); Files.copy(new File(schemaFilePath).toPath(), Paths.get("lindenSchema"), StandardCopyOption.REPLACE_EXISTING); String json = "{\"id\":0,\"groupid\":\"0\",\"tags\":\"hybrid,leather,moon-roof,reliable\",\"category\":\"compact\",\"mileage\":14900,\"price\":7500,\"contents\":\"yellow compact hybrid leather moon-roof reliable u.s.a. florida tampa asian acura 1.6el \",\"color\":\"yellow\",\"year\":1994,\"makemodel\":\"asian/acura/1.6el\",\"city\":\"u.s.a./florida/tampa\"}"; mDriver.withInput(new LongWritable(1L), new Text(json.getBytes())); mDriver.run(); } catch (Exception e) { e.printStackTrace(); Assert.assertTrue(false); } finally { FileUtils.deleteQuietly(Paths.get("lindenProperties").toFile()); FileUtils.deleteQuietly(Paths.get("lindenSchema").toFile()); } }
Example #5
Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public static void writeTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(WriteMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, WRITE_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #6
Source File: TestChainMapReduce.java From RDFS with Apache License 2.0 | 6 votes |
public void reduce(LongWritable key, Iterator<Text> values, OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException { while (values.hasNext()) { Text value = values.next(); writeFlag(conf, "reduce." + name + ".value." + value); key.set(10); output.collect(key, value); if (byValue) { assertEquals(10, key.get()); } else { assertNotSame(10, key.get()); } key.set(11); } }
Example #7
Source File: TestIPC.java From big-c with Apache License 2.0 | 6 votes |
@Override public void run() { for (int i = 0; i < count; i++) { try { LongWritable param = new LongWritable(RANDOM.nextLong()); LongWritable value = (LongWritable)client.call(param, server, null, null, 0, conf); if (!param.equals(value)) { LOG.fatal("Call failed!"); failed = true; break; } } catch (Exception e) { LOG.fatal("Caught: " + StringUtils.stringifyException(e)); failed = true; } } }
Example #8
Source File: LineRecordReader.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** Read a line. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { while (pos < end) { key.set(pos); int newSize = in.readLine(value, maxLineLength, Math.max((int)Math.min(Integer.MAX_VALUE, end-pos), maxLineLength)); if (newSize == 0) { return false; } pos += newSize; if (newSize < maxLineLength) { return true; } // line too long. try again LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } return false; }
Example #9
Source File: HalyardStats.java From Halyard with Apache License 2.0 | 6 votes |
private void report(Context output, IRI property, Value partitionId, long value) throws IOException, InterruptedException { if (value > 0 && (graphContext == null || graphContext.equals(graph))) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (DataOutputStream dos = new DataOutputStream(baos)) { dos.writeUTF(graph.stringValue()); dos.writeUTF(property.stringValue()); if (partitionId == null) { dos.writeInt(0); } else { byte b[] = HalyardTableUtils.writeBytes(partitionId); dos.writeInt(b.length); dos.write(b); } } output.write(new ImmutableBytesWritable(baos.toByteArray()), new LongWritable(value)); } }
Example #10
Source File: NNBench.java From RDFS with Apache License 2.0 | 6 votes |
/** * Create control files before a test run. * Number of files created is equal to the number of maps specified * * @throws IOException on error */ private static void createControlFiles( Configuration config ) throws IOException { FileSystem tempFS = FileSystem.get(config); LOG.info("Creating " + numberOfMaps + " control files"); for (int i = 0; i < numberOfMaps; i++) { String strFileName = "NNBench_Controlfile_" + i; Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME), strFileName); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(strFileName), new LongWritable(0l)); } finally { if (writer != null) { writer.close(); } } } }
Example #11
Source File: IOMapperBase.java From big-c with Apache License 2.0 | 6 votes |
/** * Map file name and offset into statistical data. * <p> * The map task is to get the * <tt>key</tt>, which contains the file name, and the * <tt>value</tt>, which is the offset within the file. * * The parameters are passed to the abstract method * {@link #doIO(Reporter,String,long)}, which performs the io operation, * usually read or write data, and then * {@link #collectStats(OutputCollector,String,long,Object)} * is called to prepare stat data for a subsequent reducer. */ public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String name = key.toString(); long longValue = value.get(); reporter.setStatus("starting " + name + " ::host = " + hostName); this.stream = getIOStream(name); T statValue = null; long tStart = System.currentTimeMillis(); try { statValue = doIO(reporter, name, longValue); } finally { if(stream != null) stream.close(); } long tEnd = System.currentTimeMillis(); long execTime = tEnd - tStart; collectStats(output, name, execTime, statValue); reporter.setStatus("finished " + name + " ::host = " + hostName); }
Example #12
Source File: TestMultithreadedMapRunner.java From hadoop with Apache License 2.0 | 6 votes |
public void map(LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException { if (ioEx) { throw new IOException(); } if (rtEx) { throw new RuntimeException(); } output.collect(key, value); try { Thread.sleep(100); } catch (InterruptedException ex) { throw new RuntimeException(ex); } }
Example #13
Source File: IOUtilFunctions.java From systemds with Apache License 2.0 | 6 votes |
@Override public Long call() throws Exception { RecordReader<LongWritable, Text> reader = _inputFormat.getRecordReader(_split, _jobConf, Reporter.NULL); LongWritable key = new LongWritable(); Text value = new Text(); long nrows = 0; try{ // count rows from the first non-header row if (_hasHeader) reader.next(key, value); while (reader.next(key, value)) nrows++; } finally { IOUtilFunctions.closeSilently(reader); } return nrows; }
Example #14
Source File: TestIPC.java From RDFS with Apache License 2.0 | 6 votes |
public void testStandAloneClient() throws Exception { testParallel(10, false, 2, 4, 2, 4, 100); Client client = new Client(LongWritable.class, conf); InetSocketAddress address = new InetSocketAddress("127.0.0.1", 10); try { client.call(new LongWritable(RANDOM.nextLong()), address, null, null, 0); fail("Expected an exception to have been thrown"); } catch (IOException e) { String message = e.getMessage(); String addressText = address.toString(); assertTrue("Did not find "+addressText+" in "+message, message.contains(addressText)); Throwable cause=e.getCause(); assertNotNull("No nested exception in "+e,cause); String causeText=cause.getMessage(); assertTrue("Did not find " + causeText + " in " + message, message.contains(causeText)); } }
Example #15
Source File: TestFixedLengthInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Test with record length set to 0 */ @Test (timeout=5000) public void testZeroRecordLength() throws IOException { localFs.delete(workDir, true); Path file = new Path(workDir, new String("testFormat.txt")); createFile(file, null, 10, 10); // Set the fixed length record length config property JobConf job = new JobConf(defaultConf); FileInputFormat.setInputPaths(job, workDir); FixedLengthInputFormat format = new FixedLengthInputFormat(); format.setRecordLength(job, 0); format.configure(job); InputSplit splits[] = format.getSplits(job, 1); boolean exceptionThrown = false; for (InputSplit split : splits) { try { RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter); } catch(IOException ioe) { exceptionThrown = true; LOG.info("Exception message:" + ioe.getMessage()); } } assertTrue("Exception for zero record length:", exceptionThrown); }
Example #16
Source File: AvroBigQueryInputFormat.java From hadoop-connectors with Apache License 2.0 | 5 votes |
@Override public RecordReader<LongWritable, GenericData.Record> createDelegateRecordReader( InputSplit split, Configuration configuration) throws IOException, InterruptedException { Preconditions.checkState( split instanceof FileSplit, "AvroBigQueryInputFormat requires FileSplit input splits"); return new AvroRecordReader(); }
Example #17
Source File: TestTableMapReduceUtil.java From hbase with Apache License 2.0 | 5 votes |
@Test public void testInitTableMapperJob2() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(), Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class); assertEquals(WALInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); }
Example #18
Source File: FixedLengthRecordReader.java From hadoop with Apache License 2.0 | 5 votes |
@Override public synchronized boolean next(LongWritable key, BytesWritable value) throws IOException { boolean dataRead = reader.nextKeyValue(); if (dataRead) { LongWritable newKey = reader.getCurrentKey(); BytesWritable newValue = reader.getCurrentValue(); key.set(newKey.get()); value.set(newValue); } return dataRead; }
Example #19
Source File: PcapJob.java From metron with Apache License 2.0 | 5 votes |
/** * Creates, but does not submit the job. This is the core MapReduce mrJob. Empty input path * results in a null to be returned instead of creating the job. */ public Job createJob(Optional<String> jobName ,Path basePath , Path jobOutputPath , long beginNS , long endNS , int numReducers , T fields , Configuration conf , FileSystem fs , PcapFilterConfigurator<T> filterImpl ) throws IOException { Iterable<String> filteredPaths = FileFilterUtil.getPathsInTimeRange(beginNS, endNS, listFiles(fs, basePath)); String inputPaths = Joiner.on(',').join(filteredPaths); if (StringUtils.isEmpty(inputPaths)) { return null; } conf.set(START_TS_CONF, Long.toUnsignedString(beginNS)); conf.set(END_TS_CONF, Long.toUnsignedString(endNS)); conf.set(WIDTH_CONF, "" + findWidth(beginNS, endNS, numReducers)); filterImpl.addToConfig(fields, conf); Job job = Job.getInstance(conf); jobName.ifPresent(job::setJobName); job.setJarByClass(PcapJob.class); job.setMapperClass(PcapJob.PcapMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numReducers); job.setReducerClass(PcapReducer.class); job.setPartitionerClass(PcapPartitioner.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); SequenceFileInputFormat.addInputPaths(job, inputPaths); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, jobOutputPath); return job; }
Example #20
Source File: GenerateData.java From RDFS with Apache License 2.0 | 5 votes |
@Override public void map(NullWritable key, LongWritable value, Context context) throws IOException, InterruptedException { for (long bytes = value.get(); bytes > 0; bytes -= val.getLength()) { r.nextBytes(val.getBytes()); val.setSize((int)Math.min(val.getLength(), bytes)); context.write(key, val); } }
Example #21
Source File: LongSumReducer.java From hadoop with Apache License 2.0 | 5 votes |
public void reduce(KEY key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); }
Example #22
Source File: MapUtils.java From incubator-tez with Apache License 2.0 | 5 votes |
private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file) throws IOException { FileInputFormat.setInputPaths(job, workDir); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try { Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 10; i > 0; i--) { key.set(r.nextInt(1000)); value.set(Integer.toString(i)); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(job, 1); System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; " + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; " + "file = " + ((FileSplit)splits[0]).getPath()); return splits[0]; }
Example #23
Source File: CSVReblockSPInstruction.java From systemds with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") protected JavaPairRDD<Long,FrameBlock> processFrameCSVReblockInstruction(SparkExecutionContext sec, DataCharacteristics mcOut, ValueType[] schema) { //get input rdd (needs to be longwritable/text for consistency with meta data, in case of //serialization issues create longwritableser/textser as serializable wrappers JavaPairRDD<LongWritable, Text> in = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForFrameObject(sec.getFrameObject(input1), InputInfo.CSVInputInfo); //reblock csv to binary block return FrameRDDConverterUtils.csvToBinaryBlock(sec.getSparkContext(), in, mcOut, schema, _hasHeader, _delim, _fill, _fillValue); }
Example #24
Source File: DBRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** * @deprecated Use {@link #nextKeyValue()} */ @Deprecated public boolean next(LongWritable k, T v) throws IOException { this.key = k; this.value = v; return nextKeyValue(); }
Example #25
Source File: CountPlan.java From rya with Apache License 2.0 | 5 votes |
@Override public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(final IntermediateProspect prospect, final Iterable<LongWritable> counts) { long sum = 0; for(final LongWritable count : counts) { sum += count.get(); } return Collections.singleton( new CustomEntry<IntermediateProspect, LongWritable>(prospect, new LongWritable(sum)) ); }
Example #26
Source File: WARCInputFormat.java From warc-hadoop with MIT License | 5 votes |
@Override public boolean next(LongWritable key, WARCWritable value) throws IOException { try { WARCRecord record = reader.read(); key.set(reader.getRecordsRead()); value.setRecord(record); return true; } catch (EOFException eof) { return false; } }
Example #27
Source File: LoadToES.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { LinkedMapWritable record = new LinkedMapWritable(); String line = value.toString(); Iterator<Text> fieldNameIter = fieldNames.iterator(); for (StringTokenizer tokenizer = new StringTokenizer(line, "\t"); tokenizer.hasMoreTokens(); ) { if (fieldNameIter.hasNext()) { Text fieldName = fieldNameIter.next(); String field = tokenizer.nextToken(); record.put(fieldName, new Text(field)); } } context.write(NullWritable.get(), record); }
Example #28
Source File: ParseLogJob.java From 163-bigdate-note with GNU General Public License v3.0 | 5 votes |
public int run(String[] args) throws Exception { //创建job Configuration config = getConf(); Job job = Job.getInstance(config); //通过job设置一些参数 job.setJarByClass(ParseLogJob.class); job.setJobName("parselog"); job.setMapperClass(LogMapper.class); //设置reduce个数为0 job.setReducerClass(LogReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LogWritable.class); job.setOutputValueClass(Text.class); //添加输入和输出数据 FileInputFormat.addInputPath(job, new Path(args[0])); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputPath); //设置压缩类型 FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, LzopCodec.class); FileSystem fs = FileSystem.get(config); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } //运行程序 if (!job.waitForCompletion(true)) { throw new RuntimeException(job.getJobName() + "failed!"); } return 0; }
Example #29
Source File: SketchEvaluator.java From incubator-datasketches-hive with Apache License 2.0 | 5 votes |
@Override public Object terminatePartial(final @SuppressWarnings("deprecation") AggregationBuffer buf) throws HiveException { final State state = (State) buf; final CpcSketch intermediate = state.getResult(); if (intermediate == null) { return null; } final byte[] bytes = intermediate.toByteArray(); return Arrays.asList( new IntWritable(state.getLgK()), new LongWritable(state.getSeed()), new BytesWritable(bytes) ); }
Example #30
Source File: MultiLineInputFormat.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context) { context.setStatus(genericSplit.toString()); return new MultiLineRecordReader(); }