Java Code Examples for org.apache.hadoop.io.Text#set()

The following examples show how to use org.apache.hadoop.io.Text#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkFactDistinct.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void addFieldValue(DataType type, Integer colIndex, String value,
        List<Tuple2<SelfDefineSortableKey, Text>> result) {
    int reducerIndex = reducerMapping.getReducerIdForCol(colIndex, value);
    tmpbuf.clear();
    byte[] valueBytes = Bytes.toBytes(value);
    int size = valueBytes.length + 1;
    if (size >= tmpbuf.capacity()) {
        tmpbuf = ByteBuffer.allocate(countNewSize(tmpbuf.capacity(), size));
    }
    tmpbuf.put(Bytes.toBytes(reducerIndex)[3]);
    tmpbuf.put(valueBytes);

    Text outputKey = new Text();
    SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();

    outputKey.set(tmpbuf.array(), 0, tmpbuf.position());
    sortableKey.init(outputKey, type);

    result.add(new Tuple2<SelfDefineSortableKey, Text>(sortableKey, new Text()));

    // log a few rows for troubleshooting
    if (result.size() < 10) {
        logger.info("Sample output: {} '{}' => reducer {}", allCols.get(colIndex), value, reducerIndex);
    }
}
 
Example 2
Source File: CSVOutputUtils.java    From incubator-retired-pirk with Apache License 2.0 6 votes vote down vote up
public static void extractCSVOutputIdentityStripFirstField(Text value, Text input)
{
  String csvOut = input.toString();
  String tokens[] = csvOut.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)", -1);

  if (tokens.length > 4)
  {
    setCSVOutput(value, tokens[1], tokens[2], tokens[3], tokens[4]);
  }
  else if (tokens.length == 4)
  {
    setCSVOutput(value, tokens[1], tokens[2], tokens[3]);
  }
  else
  {
    logger.info("WARN: tokens.length = " + tokens.length + " != 4 for input = " + csvOut);
    value.set(input.toString());
  }
}
 
Example 3
Source File: TestContainerLocalizer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
static DataInputBuffer createFakeCredentials(Random r, int nTok)
      throws IOException {
    Credentials creds = new Credentials();
    byte[] password = new byte[20];
    Text kind = new Text();
    Text service = new Text();
    Text alias = new Text();
    for (int i = 0; i < nTok; ++i) {
      byte[] identifier = ("idef" + i).getBytes();
      r.nextBytes(password);
      kind.set("kind" + i);
      service.set("service" + i);
      alias.set("token" + i);
      Token token = new Token(identifier, password, kind, service);
      creds.addToken(alias, token);
    }
    DataOutputBuffer buf = new DataOutputBuffer();
    creds.writeTokenStorageToStream(buf);
    DataInputBuffer ret = new DataInputBuffer();
    ret.reset(buf.getData(), 0, buf.getLength());
    return ret;
  }
 
Example 4
Source File: SelfDefineSortableKeyTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) {
    int partationId = 0;
    ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>();
    for (String str : strNumList) {
        ByteBuffer keyBuffer = ByteBuffer.allocate(4096);
        int offset = keyBuffer.position();
        keyBuffer.put(Bytes.toBytes(partationId)[3]);
        keyBuffer.put(Bytes.toBytes(str));
        Bytes.copy(keyBuffer.array(), 1, keyBuffer.position() - offset - 1);
        Text outputKey = new Text();
        outputKey.set(keyBuffer.array(), offset, keyBuffer.position() - offset);
        SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();
        sortableKey.init(outputKey, typeFlag);
        keyList.add(sortableKey);
    }
    return keyList;
}
 
Example 5
Source File: OutputToTextMapper.java    From wikireverse with MIT License 5 votes vote down vote up
public void map(Text lowerCaseKey, LinkArrayWritable value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
	
	try {
		int recordCount = 0;
		Text outputValue = new Text();

		String correctCase = value.getMostUsedArticleCasing();
		String correctKey = lowerCaseKey.toString().replace(correctCase.toLowerCase(), correctCase);
		
		if (correctKey.endsWith("//")) {
			correctKey = correctKey.replaceAll("////", "");
		}
	
		Text key = new Text(correctKey);
		
		for (Writable rawValue : value.get()) {
			LinkWritable link = (LinkWritable)rawValue;
			outputValue.set(link.toString());

			output.collect(key, outputValue);
			recordCount++;
		}
		
		reporter.incrCounter(COUNTER_GROUP, RECORDS_FETCHED, 1);
		reporter.incrCounter(COUNTER_GROUP, RESULTS_OUTPUT, recordCount);
		
	} catch(Exception e) {
		reporter.incrCounter(COUNTER_GROUP, MAP_EXCEPTION, 1);
		LOG.error(StringUtils.stringifyException(e));
	}
}
 
Example 6
Source File: GenericMRLoadGenerator.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private int generateSentence(Text t, int noWords) {
  sentence.setLength(0);
  --noWords;
  for (int i = 0; i < noWords; ++i) {
    sentence.append(words[r.nextInt(words.length)]);
    sentence.append(" ");
  }
  if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]);
  t.set(sentence.toString());
  return sentence.length();
}
 
Example 7
Source File: RandomWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
public boolean nextKeyValue() {
  if (name != null) {
    key = new Text();
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}
 
Example 8
Source File: TestMapRed.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testNullKeys() throws Exception {
  JobConf conf = new JobConf(TestMapRed.class);
  FileSystem fs = FileSystem.getLocal(conf);
  Path testdir = new Path(
      System.getProperty("test.build.data","/tmp")).makeQualified(fs);
  fs.delete(testdir, true);
  Path inFile = new Path(testdir, "nullin/blah");
  SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile,
      NullWritable.class, Text.class, SequenceFile.CompressionType.NONE);
  Text t = new Text();
  t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t);
  t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t);
  t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t);
  t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t);
  t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t);
  t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t);
  t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t);
  t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t);
  w.close();
  FileInputFormat.setInputPaths(conf, inFile);
  FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout"));
  conf.setMapperClass(NullMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setNumReduceTasks(1);

  JobClient.runJob(conf);

  SequenceFile.Reader r = new SequenceFile.Reader(fs,
      new Path(testdir, "nullout/part-00000"), conf);
  String m = "AAAAAAAAAAAAAA";
  for (int i = 1; r.next(NullWritable.get(), t); ++i) {
    assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString()));
    m = m.replace((char)('A' + i - 1), (char)('A' + i));
  }
}
 
Example 9
Source File: EsInputFormat.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected Text setCurrentKey(Text hadoopKey, Object object) {
    if (hadoopKey != null) {
        hadoopKey.set(object.toString());
    }
    return hadoopKey;
}
 
Example 10
Source File: FactDistinctColumnsReducerTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testReducerStatistics() throws IOException {
    setConfigurations();
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_STATISTICS, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    // override the task id
    int dimColsSize = cubeDesc.getRowkey().getRowKeyColumns().length;
    int uhcSize = cubeDesc.getAllUHCColumns().size();
    final int targetTaskId = (dimColsSize - uhcSize) + uhcSize * cubeDesc.getConfig().getUHCReducerCount();

    setContextTaskId(targetTaskId);
    ByteBuffer tmpBuf = ByteBuffer.allocate(4096);
    tmpBuf.put((byte) FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER); // one byte
    tmpBuf.putLong(100);
    Text outputKey1 = new Text();
    outputKey1.set(tmpBuf.array(), 0, tmpBuf.position());
    SelfDefineSortableKey key1 = new SelfDefineSortableKey();
    key1.init(outputKey1, (byte) 0);

    HLLCounter hll = createMockHLLCounter();
    ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
    hllBuf.clear();
    hll.writeRegisters(hllBuf);
    Text value1 = new Text();
    value1.set(hllBuf.array(), 0, hllBuf.position());

    reduceDriver.setInput(key1, ImmutableList.of(value1));

    List<Pair<NullWritable, Text>> result = reduceDriver.run();
    assertEquals(0, result.size()); // the reducer output statistics info to a sequence file.
}
 
Example 11
Source File: MockRecordReader.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
@Override
public boolean next( Text key, Text value ) throws IOException {
  if ( !rowIter.hasNext() ) {
    return false;
  }
  rowNum++;
  key.set( String.valueOf( rowNum ) );
  value.set( rowIter.next() );
  return true;
}
 
Example 12
Source File: SequenceFileAsTextRecordReader.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}
 
Example 13
Source File: PatternMatcherTest.java    From jumbune with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Test
public void matchTestAgainstStringNull() {
	Text value = new Text();
	value.set("null");
	boolean check = PatternMatcher.match(value);
	assertFalse(check);
}
 
Example 14
Source File: TestIndexedSort.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void genRandom(Text t, int len, StringBuilder sb) {
  sb.setLength(0);
  for (int i = 0; i < len; ++i) {
    sb.append(Integer.toString(r.nextInt(26) + 10, 36));
  }
  t.set(sb.toString());
}
 
Example 15
Source File: SequenceFileAsTextRecordReader.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}
 
Example 16
Source File: SequenceFileProtobufWriter.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
/**
   * Write the sequence file.
   *
   * @param args the command-line arguments
   * @return the process exit code
   * @throws Exception if something goes wrong
   */
  public int run(final String[] args) throws Exception {

    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
      return result;
    }

    File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

Configuration conf = super.getConf();

ProtobufSerialization.register(conf);

SequenceFile.Writer writer =
    SequenceFile.createWriter(conf,
        SequenceFile.Writer.file(outputPath),
        SequenceFile.Writer.keyClass(Text.class),
        SequenceFile.Writer.valueClass(Stock.class),
        SequenceFile.Writer.compression(
            SequenceFile.CompressionType.BLOCK,
            new DefaultCodec())
    );
try {
  Text key = new Text();

  for (Stock stock : StockUtils.fromCsvFile(inputFile)) {
    key.set(stock.getSymbol());
    writer.append(key, stock);
  }
} finally {
  writer.close();
}
    return 0;
  }
 
Example 17
Source File: KettleTypeToTextConverter.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
@Override
public Text convert( ValueMetaInterface meta, Object obj ) throws TypeConversionException {
  try {
    Text text = new Text();
    text.set( meta.getString( obj ) );
    return text;
  } catch ( KettleValueException ex ) {
    throw new TypeConversionException(
      BaseMessages.getString( TypeConverterFactory.class, "ErrorConverting", Text.class.getSimpleName(), obj ), ex );
  }
}
 
Example 18
Source File: FileBench.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked") // OutputFormat instantiation
static long writeBench(JobConf conf) throws IOException {
  long filelen = conf.getLong("filebench.file.bytes", 5 * 1024 * 1024 * 1024);
  Text key = new Text();
  Text val = new Text();

  final String fn = conf.get("test.filebench.name", "");
  final Path outd = FileOutputFormat.getOutputPath(conf);
  conf.set("mapred.work.output.dir", outd.toString());
  OutputFormat outf = conf.getOutputFormat();
  RecordWriter<Text,Text> rw =
    outf.getRecordWriter(outd.getFileSystem(conf), conf, fn,
                         Reporter.NULL);
  try {
    long acc = 0L;
    Date start = new Date();
    for (int i = 0; acc < filelen; ++i) {
      i %= keys.length;
      key.set(keys[i]);
      val.set(values[i]);
      rw.write(key, val);
      acc += keys[i].length();
      acc += values[i].length();
    }
    Date end = new Date();
    return end.getTime() - start.getTime();
  } finally {
    rw.close(Reporter.NULL);
  }
}
 
Example 19
Source File: LexicoderRowSerializer.java    From presto with Apache License 2.0 4 votes vote down vote up
@Override
public void setDouble(Text text, Double value)
{
    text.set(encode(DOUBLE, value));
}
 
Example 20
Source File: CSVOutputUtils.java    From incubator-retired-pirk with Apache License 2.0 3 votes vote down vote up
public static Text setCSVOutput(String domain, String ip, String timestamp, String generic)
{
  Text value = new Text();

  String csvOut = domain + "," + ip + "," + timestamp + "," + generic;

  value.set(csvOut);

  return value;
}