org.apache.hadoop.io.LongWritable Java Exaples

Source File: merge_results_mapper.java From MLHadoop with Apache License 2.0

6 votes

public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
	String[] parts = value.toString().split("\\t");
	// Processing Upper Triangular Matrix's rows
	if (this.upper && !parts[0].contains(",")) {
		context.write(new TextPair(parts[0],""), new Text(parts[1]));
	}
	// Processing Lower Triangular Matrix's rows
	if (!this.upper && parts[0].contains(",")) {
		
		String[] rowCol = parts[0].split(",");
		String row = rowCol[0];
		// Sending first row of Lower Triangular Matrix to the reducer
		if (Integer.valueOf(row)-1 == 0) {
			for (int i = 0; i < this.total_records; i++) {
				context.write(new TextPair("0",String.valueOf(i)), new Text(i+","+((i == 0) ? 1 : 0)));
			}
		}
		String column = rowCol[1];
		String element = parts[1];
		context.write(new TextPair(row, column), new Text(column+","+element));
	}
}

Source File: MapOutputCorrectness.java From RDFS with Apache License 2.0

6 votes

@Override
public void configure(JobConf job) {
  this.conf = job;
  taskPartition = conf.getInt("mapred.task.partition", -1);
  int startingSeed = conf.getInt(SEED, -1) + taskPartition;
  random = new Random(startingSeed);
  LOG.info("Starting with seed " + startingSeed +
      " on partition " + taskPartition);
  numKeysPerMapper = conf.getInt(NUM_KEYS_PER_MAPPER, -1);
  numValuesPerKey = conf.getInt(NUM_VALUES_PER_KEY, -1);
  numMappers = conf.getNumMapTasks();
  numReducers = conf.getInt("mapred.reduce.tasks", -1);
  maxKeySpace = conf.getInt(MAX_KEY_SPACE, DEFAULT_MAX_KEY_SPACE);
  chanceFailure = conf.getFloat(CHANCE_FAILURE, 0.0f);
  if (numKeysPerMapper == -1 || numValuesPerKey == -1 || numReducers == -1
      || maxKeySpace == -1) {
    throw new IllegalArgumentException(
        "Illegal values " + numKeysPerMapper + " " + numValuesPerKey +
        " " + numReducers + " " + maxKeySpace);
  }
  for (int i = 0; i < numMappers; ++i) {
    mapperSumList.add(new LongWritable(0));
    expectedMapperSumList.add(new LongWritable(-1));
  }
}

Source File: AUCUDAF.java From incubator-hivemall with Apache License 2.0

6 votes

@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
    ClassificationAUCAggregationBuffer myAggr = (ClassificationAUCAggregationBuffer) agg;

    Object[] partialResult = new Object[11];
    partialResult[0] = new DoubleWritable(myAggr.indexScore);
    partialResult[1] = new DoubleWritable(myAggr.area);
    partialResult[2] = new LongWritable(myAggr.fp);
    partialResult[3] = new LongWritable(myAggr.tp);
    partialResult[4] = new LongWritable(myAggr.fpPrev);
    partialResult[5] = new LongWritable(myAggr.tpPrev);
    partialResult[6] = myAggr.areaPartialMap;
    partialResult[7] = myAggr.fpPartialMap;
    partialResult[8] = myAggr.tpPartialMap;
    partialResult[9] = myAggr.fpPrevPartialMap;
    partialResult[10] = myAggr.tpPrevPartialMap;

    return partialResult;
}

Source File: LindenMapredTest.java From linden with Apache License 2.0

6 votes

@Test
public void TestMapper() throws IOException {
  try {
    String propertiesFilePath = LindenMapredTest.class.getClassLoader().getResource("linden.properties").getFile();
    Files.copy(new File(propertiesFilePath).toPath(), Paths.get("lindenProperties"), StandardCopyOption.REPLACE_EXISTING);
    String schemaFilePath = LindenMapredTest.class.getClassLoader().getResource("schema.xml").getFile();
    Files.copy(new File(schemaFilePath).toPath(), Paths.get("lindenSchema"), StandardCopyOption.REPLACE_EXISTING);
    String json = "{\"id\":0,\"groupid\":\"0\",\"tags\":\"hybrid,leather,moon-roof,reliable\",\"category\":\"compact\",\"mileage\":14900,\"price\":7500,\"contents\":\"yellow compact hybrid leather moon-roof reliable u.s.a. florida tampa asian acura 1.6el \",\"color\":\"yellow\",\"year\":1994,\"makemodel\":\"asian/acura/1.6el\",\"city\":\"u.s.a./florida/tampa\"}";
    mDriver.withInput(new LongWritable(1L), new Text(json.getBytes()));
    mDriver.run();
  } catch (Exception e) {
    e.printStackTrace();
    Assert.assertTrue(false);
  } finally {
    FileUtils.deleteQuietly(Paths.get("lindenProperties").toFile());
    FileUtils.deleteQuietly(Paths.get("lindenSchema").toFile());
  }
}

Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0

6 votes

public static void writeTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(DATA_DIR, true);
  fs.delete(WRITE_DIR, true);
  
  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(WriteMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, WRITE_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: TestChainMapReduce.java From RDFS with Apache License 2.0

6 votes

public void reduce(LongWritable key, Iterator<Text> values,
                   OutputCollector<LongWritable, Text> output,
                   Reporter reporter) throws IOException {
  while (values.hasNext()) {
    Text value = values.next();
    writeFlag(conf, "reduce." + name + ".value." + value);
    key.set(10);
    output.collect(key, value);
    if (byValue) {
      assertEquals(10, key.get());
    } else {
      assertNotSame(10, key.get());
    }
    key.set(11);
  }
}

Source File: TestIPC.java From big-c with Apache License 2.0

6 votes

@Override
public void run() {
  for (int i = 0; i < count; i++) {
    try {
      LongWritable param = new LongWritable(RANDOM.nextLong());
      LongWritable value =
        (LongWritable)client.call(param, server, null, null, 0, conf);
      if (!param.equals(value)) {
        LOG.fatal("Call failed!");
        failed = true;
        break;
      }
    } catch (Exception e) {
      LOG.fatal("Caught: " + StringUtils.stringifyException(e));
      failed = true;
    }
  }
}

Source File: LineRecordReader.java From hadoop-gpu with Apache License 2.0

6 votes

/** Read a line. */
public synchronized boolean next(LongWritable key, Text value)
  throws IOException {

  while (pos < end) {
    key.set(pos);

    int newSize = in.readLine(value, maxLineLength,
                              Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),
                                       maxLineLength));
    if (newSize == 0) {
      return false;
    }
    pos += newSize;
    if (newSize < maxLineLength) {
      return true;
    }

    // line too long. try again
    LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
  }

  return false;
}

Source File: HalyardStats.java From Halyard with Apache License 2.0

6 votes

private void report(Context output, IRI property, Value partitionId, long value) throws IOException, InterruptedException {
    if (value > 0 && (graphContext == null || graphContext.equals(graph))) {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try (DataOutputStream dos = new DataOutputStream(baos)) {
            dos.writeUTF(graph.stringValue());
            dos.writeUTF(property.stringValue());
            if (partitionId == null) {
                dos.writeInt(0);
            } else {
                byte b[] = HalyardTableUtils.writeBytes(partitionId);
                dos.writeInt(b.length);
                dos.write(b);
            }
        }
        output.write(new ImmutableBytesWritable(baos.toByteArray()), new LongWritable(value));
    }
}

Source File: NNBench.java From RDFS with Apache License 2.0

6 votes

/**
 * Create control files before a test run.
 * Number of files created is equal to the number of maps specified
 * 
 * @throws IOException on error
 */
private static void createControlFiles(
  Configuration config
) throws IOException {

  FileSystem tempFS = FileSystem.get(config);
  LOG.info("Creating " + numberOfMaps + " control files");

  for (int i = 0; i < numberOfMaps; i++) {
    String strFileName = "NNBench_Controlfile_" + i;
    Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
            strFileName);

    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class, 
              LongWritable.class, CompressionType.NONE);
      writer.append(new Text(strFileName), new LongWritable(0l));
    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
}

Source File: IOMapperBase.java From big-c with Apache License 2.0

6 votes

/**
 * Map file name and offset into statistical data.
 * <p>
 * The map task is to get the 
 * <tt>key</tt>, which contains the file name, and the 
 * <tt>value</tt>, which is the offset within the file.
 * 
 * The parameters are passed to the abstract method 
 * {@link #doIO(Reporter,String,long)}, which performs the io operation, 
 * usually read or write data, and then 
 * {@link #collectStats(OutputCollector,String,long,Object)} 
 * is called to prepare stat data for a subsequent reducer.
 */
public void map(Text key, 
                LongWritable value,
                OutputCollector<Text, Text> output, 
                Reporter reporter) throws IOException {
  String name = key.toString();
  long longValue = value.get();
  
  reporter.setStatus("starting " + name + " ::host = " + hostName);

  this.stream = getIOStream(name);
  T statValue = null;
  long tStart = System.currentTimeMillis();
  try {
    statValue = doIO(reporter, name, longValue);
  } finally {
    if(stream != null) stream.close();
  }
  long tEnd = System.currentTimeMillis();
  long execTime = tEnd - tStart;
  collectStats(output, name, execTime, statValue);
  
  reporter.setStatus("finished " + name + " ::host = " + hostName);
}

Source File: TestMultithreadedMapRunner.java From hadoop with Apache License 2.0

6 votes

public void map(LongWritable key, Text value,
                OutputCollector<LongWritable, Text> output,
                Reporter reporter)
        throws IOException {
  if (ioEx) {
    throw new IOException();
  }
  if (rtEx) {
    throw new RuntimeException();
  }
  output.collect(key, value);
  try {
    Thread.sleep(100);
  } catch (InterruptedException ex) {
    throw new RuntimeException(ex);
  }
}

Source File: IOUtilFunctions.java From systemds with Apache License 2.0

6 votes

@Override
public Long call() throws Exception {
	RecordReader<LongWritable, Text> reader = _inputFormat.getRecordReader(_split, _jobConf, Reporter.NULL);
	LongWritable key = new LongWritable();
	Text value = new Text();
	long nrows = 0;

	try{
		// count rows from the first non-header row
		if (_hasHeader)
			reader.next(key, value);
		while (reader.next(key, value))
			nrows++;
	}
	finally {
		IOUtilFunctions.closeSilently(reader);
	}
	return nrows;
}

Source File: TestIPC.java From RDFS with Apache License 2.0

6 votes

public void testStandAloneClient() throws Exception {
  testParallel(10, false, 2, 4, 2, 4, 100);
  Client client = new Client(LongWritable.class, conf);
  InetSocketAddress address = new InetSocketAddress("127.0.0.1", 10);
  try {
    client.call(new LongWritable(RANDOM.nextLong()),
            address, null, null, 0);
    fail("Expected an exception to have been thrown");
  } catch (IOException e) {
    String message = e.getMessage();
    String addressText = address.toString();
    assertTrue("Did not find "+addressText+" in "+message,
            message.contains(addressText));
    Throwable cause=e.getCause();
    assertNotNull("No nested exception in "+e,cause);
    String causeText=cause.getMessage();
    assertTrue("Did not find " + causeText + " in " + message,
            message.contains(causeText));
  }
}

Source File: TestFixedLengthInputFormat.java From hadoop with Apache License 2.0

6 votes

/**
 * Test with record length set to 0
 */
@Test (timeout=5000)
public void testZeroRecordLength() throws IOException {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Set the fixed length record length config property 
  JobConf job = new JobConf(defaultConf);
  FileInputFormat.setInputPaths(job, workDir);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.setRecordLength(job, 0);
  format.configure(job);
  InputSplit splits[] = format.getSplits(job, 1);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      RecordReader<LongWritable, BytesWritable> reader = 
                           format.getRecordReader(split, job, voidReporter);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for zero record length:", exceptionThrown);
}

Source File: AvroBigQueryInputFormat.java From hadoop-connectors with Apache License 2.0

5 votes

@Override
public RecordReader<LongWritable, GenericData.Record> createDelegateRecordReader(
    InputSplit split, Configuration configuration) throws IOException, InterruptedException {
  Preconditions.checkState(
      split instanceof FileSplit, "AvroBigQueryInputFormat requires FileSplit input splits");
  return new AvroRecordReader();
}

Source File: TestTableMapReduceUtil.java From hbase with Apache License 2.0

5 votes

@Test
public void testInitTableMapperJob2() throws Exception {
  Configuration configuration = new Configuration();
  Job job = new Job(configuration, "tableName");
  TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
      Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class);
  assertEquals(WALInputFormat.class, job.getInputFormatClass());
  assertEquals(Import.Importer.class, job.getMapperClass());
  assertEquals(LongWritable.class, job.getOutputKeyClass());
  assertEquals(Text.class, job.getOutputValueClass());
  assertNull(job.getCombinerClass());
  assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
}

Source File: FixedLengthRecordReader.java From hadoop with Apache License 2.0

5 votes

@Override
public synchronized boolean next(LongWritable key, BytesWritable value)
    throws IOException {
  boolean dataRead = reader.nextKeyValue();
  if (dataRead) {
    LongWritable newKey = reader.getCurrentKey();
    BytesWritable newValue = reader.getCurrentValue();
    key.set(newKey.get());
    value.set(newValue);
  }
  return dataRead;
}

Source File: PcapJob.java From metron with Apache License 2.0

5 votes

/**
 * Creates, but does not submit the job. This is the core MapReduce mrJob. Empty input path
 * results in a null to be returned instead of creating the job.
 */
public Job createJob(Optional<String> jobName
                    ,Path basePath
                    , Path jobOutputPath
                    , long beginNS
                    , long endNS
                    , int numReducers
                    , T fields
                    , Configuration conf
                    , FileSystem fs
                    , PcapFilterConfigurator<T> filterImpl
                    ) throws IOException
{
  Iterable<String> filteredPaths = FileFilterUtil.getPathsInTimeRange(beginNS, endNS, listFiles(fs, basePath));
  String inputPaths = Joiner.on(',').join(filteredPaths);
  if (StringUtils.isEmpty(inputPaths)) {
    return null;
  }
  conf.set(START_TS_CONF, Long.toUnsignedString(beginNS));
  conf.set(END_TS_CONF, Long.toUnsignedString(endNS));
  conf.set(WIDTH_CONF, "" + findWidth(beginNS, endNS, numReducers));
  filterImpl.addToConfig(fields, conf);
  Job job = Job.getInstance(conf);
  jobName.ifPresent(job::setJobName);
  job.setJarByClass(PcapJob.class);
  job.setMapperClass(PcapJob.PcapMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(BytesWritable.class);
  job.setNumReduceTasks(numReducers);
  job.setReducerClass(PcapReducer.class);
  job.setPartitionerClass(PcapPartitioner.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  SequenceFileInputFormat.addInputPaths(job, inputPaths);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(job, jobOutputPath);
  return job;
}

Source File: GenerateData.java From RDFS with Apache License 2.0

5 votes

@Override
public void map(NullWritable key, LongWritable value, Context context)
    throws IOException, InterruptedException {
  for (long bytes = value.get(); bytes > 0; bytes -= val.getLength()) {
    r.nextBytes(val.getBytes());
    val.setSize((int)Math.min(val.getLength(), bytes));
    context.write(key, val);
  }
}

Source File: LongSumReducer.java From hadoop with Apache License 2.0

5 votes

public void reduce(KEY key, Iterable<LongWritable> values,
                   Context context) throws IOException, InterruptedException {
  long sum = 0;
  for (LongWritable val : values) {
    sum += val.get();
  }
  result.set(sum);
  context.write(key, result);
}

Source File: MapUtils.java From incubator-tez with Apache License 2.0

5 votes

private static InputSplit 
createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file) 
    throws IOException {
  FileInputFormat.setInputPaths(job, workDir);

  LOG.info("Generating data at path: " + file);
  // create a file with length entries
  @SuppressWarnings("deprecation")
  SequenceFile.Writer writer = 
      SequenceFile.createWriter(fs, job, file, 
          LongWritable.class, Text.class);
  try {
    Random r = new Random(System.currentTimeMillis());
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i = 10; i > 0; i--) {
      key.set(r.nextInt(1000));
      value.set(Integer.toString(i));
      writer.append(key, value);
      LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
    }
  } finally {
    writer.close();
  }
  
  SequenceFileInputFormat<LongWritable, Text> format = 
      new SequenceFileInputFormat<LongWritable, Text>();
  InputSplit[] splits = format.getSplits(job, 1);
  System.err.println("#split = " + splits.length + " ; " +
      "#locs = " + splits[0].getLocations().length + "; " +
      "loc = " + splits[0].getLocations()[0] + "; " + 
      "off = " + splits[0].getLength() + "; " +
      "file = " + ((FileSplit)splits[0]).getPath());
  return splits[0];
}

Source File: CSVReblockSPInstruction.java From systemds with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
protected JavaPairRDD<Long,FrameBlock> processFrameCSVReblockInstruction(SparkExecutionContext sec, DataCharacteristics mcOut, ValueType[] schema) {
	//get input rdd (needs to be longwritable/text for consistency with meta data, in case of
	//serialization issues create longwritableser/textser as serializable wrappers
	JavaPairRDD<LongWritable, Text> in = (JavaPairRDD<LongWritable, Text>) 
		sec.getRDDHandleForFrameObject(sec.getFrameObject(input1), InputInfo.CSVInputInfo);
	
	//reblock csv to binary block
	return FrameRDDConverterUtils.csvToBinaryBlock(sec.getSparkContext(),
		in, mcOut, schema, _hasHeader, _delim, _fill, _fillValue);
}

Source File: DBRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

/**
 * @deprecated Use {@link #nextKeyValue()}
 */
@Deprecated
public boolean next(LongWritable k, T v) throws IOException {
  this.key = k;
  this.value = v;
  return nextKeyValue();
}

Source File: CountPlan.java From rya with Apache License 2.0

5 votes

@Override
public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(final IntermediateProspect prospect, final Iterable<LongWritable> counts) {
    long sum = 0;
    for(final LongWritable count : counts) {
        sum += count.get();
    }
    return Collections.singleton( new CustomEntry<IntermediateProspect, LongWritable>(prospect, new LongWritable(sum)) );
}

Source File: WARCInputFormat.java From warc-hadoop with MIT License

5 votes

@Override
public boolean next(LongWritable key, WARCWritable value) throws IOException {
    try {
        WARCRecord record = reader.read();
        key.set(reader.getRecordsRead());
        value.setRecord(record);
        return true;
    } catch (EOFException eof) {
        return false;
    }
}

Source File: LoadToES.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    LinkedMapWritable record = new LinkedMapWritable();
    String line = value.toString();
    Iterator<Text> fieldNameIter = fieldNames.iterator();
    for (StringTokenizer tokenizer = new StringTokenizer(line, "\t"); tokenizer.hasMoreTokens(); ) {
        if (fieldNameIter.hasNext()) {
            Text fieldName = fieldNameIter.next();
            String field = tokenizer.nextToken();
            record.put(fieldName, new Text(field));
        }
    }
    context.write(NullWritable.get(), record);
}

Source File: ParseLogJob.java From 163-bigdate-note with GNU General Public License v3.0

5 votes

public int run(String[] args) throws Exception {
    //创建job
    Configuration config = getConf();
    Job job = Job.getInstance(config);
    //通过job设置一些参数
    job.setJarByClass(ParseLogJob.class);
    job.setJobName("parselog");
    job.setMapperClass(LogMapper.class);
    //设置reduce个数为0
    job.setReducerClass(LogReducer.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LogWritable.class);
    job.setOutputValueClass(Text.class);


    //添加输入和输出数据
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputPath);

    //设置压缩类型
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, LzopCodec.class);

    FileSystem fs = FileSystem.get(config);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }


    //运行程序
    if (!job.waitForCompletion(true)) {
        throw new RuntimeException(job.getJobName() + "failed!");
    }
    return 0;
}

Source File: SketchEvaluator.java From incubator-datasketches-hive with Apache License 2.0

5 votes

@Override
public Object terminatePartial(final @SuppressWarnings("deprecation") AggregationBuffer buf)
    throws HiveException {
  final State state = (State) buf;
  final CpcSketch intermediate = state.getResult();
  if (intermediate == null) { return null; }
  final byte[] bytes = intermediate.toByteArray();
  return Arrays.asList(
    new IntWritable(state.getLgK()),
    new LongWritable(state.getSeed()),
    new BytesWritable(bytes)
  );
}

Source File: MultiLineInputFormat.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit genericSplit,
        TaskAttemptContext context)
{
    context.setStatus(genericSplit.toString());
    return new MultiLineRecordReader();
}

org.apache.hadoop.io.LongWritable Java Examples