Java Code Examples for org.apache.hadoop.io.Text#set()

The following examples show how to use org.apache.hadoop.io.Text#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SparkFactDistinct.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void addFieldValue(DataType type, Integer colIndex, String value,
        List<Tuple2<SelfDefineSortableKey, Text>> result) {
    int reducerIndex = reducerMapping.getReducerIdForCol(colIndex, value);
    tmpbuf.clear();
    byte[] valueBytes = Bytes.toBytes(value);
    int size = valueBytes.length + 1;
    if (size >= tmpbuf.capacity()) {
        tmpbuf = ByteBuffer.allocate(countNewSize(tmpbuf.capacity(), size));
    }
    tmpbuf.put(Bytes.toBytes(reducerIndex)[3]);
    tmpbuf.put(valueBytes);

    Text outputKey = new Text();
    SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();

    outputKey.set(tmpbuf.array(), 0, tmpbuf.position());
    sortableKey.init(outputKey, type);

    result.add(new Tuple2<SelfDefineSortableKey, Text>(sortableKey, new Text()));

    // log a few rows for troubleshooting
    if (result.size() < 10) {
        logger.info("Sample output: {} '{}' => reducer {}", allCols.get(colIndex), value, reducerIndex);
    }
}

Example 2

Source File: CSVOutputUtils.java From incubator-retired-pirk with Apache License 2.0

6 votes

public static void extractCSVOutputIdentityStripFirstField(Text value, Text input)
{
  String csvOut = input.toString();
  String tokens[] = csvOut.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)", -1);

  if (tokens.length > 4)
  {
    setCSVOutput(value, tokens[1], tokens[2], tokens[3], tokens[4]);
  }
  else if (tokens.length == 4)
  {
    setCSVOutput(value, tokens[1], tokens[2], tokens[3]);
  }
  else
  {
    logger.info("WARN: tokens.length = " + tokens.length + " != 4 for input = " + csvOut);
    value.set(input.toString());
  }
}

Example 3

Source File: TestContainerLocalizer.java From hadoop with Apache License 2.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
static DataInputBuffer createFakeCredentials(Random r, int nTok)
      throws IOException {
    Credentials creds = new Credentials();
    byte[] password = new byte[20];
    Text kind = new Text();
    Text service = new Text();
    Text alias = new Text();
    for (int i = 0; i < nTok; ++i) {
      byte[] identifier = ("idef" + i).getBytes();
      r.nextBytes(password);
      kind.set("kind" + i);
      service.set("service" + i);
      alias.set("token" + i);
      Token token = new Token(identifier, password, kind, service);
      creds.addToken(alias, token);
    }
    DataOutputBuffer buf = new DataOutputBuffer();
    creds.writeTokenStorageToStream(buf);
    DataInputBuffer ret = new DataInputBuffer();
    ret.reset(buf.getData(), 0, buf.getLength());
    return ret;
  }

Example 4

Source File: SelfDefineSortableKeyTest.java From kylin with Apache License 2.0

6 votes

private ArrayList<SelfDefineSortableKey> createKeyList(List<String> strNumList, byte typeFlag) {
    int partationId = 0;
    ArrayList<SelfDefineSortableKey> keyList = new ArrayList<>();
    for (String str : strNumList) {
        ByteBuffer keyBuffer = ByteBuffer.allocate(4096);
        int offset = keyBuffer.position();
        keyBuffer.put(Bytes.toBytes(partationId)[3]);
        keyBuffer.put(Bytes.toBytes(str));
        Bytes.copy(keyBuffer.array(), 1, keyBuffer.position() - offset - 1);
        Text outputKey = new Text();
        outputKey.set(keyBuffer.array(), offset, keyBuffer.position() - offset);
        SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();
        sortableKey.init(outputKey, typeFlag);
        keyList.add(sortableKey);
    }
    return keyList;
}

Example 5

Source File: OutputToTextMapper.java From wikireverse with MIT License

5 votes

public void map(Text lowerCaseKey, LinkArrayWritable value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
	
	try {
		int recordCount = 0;
		Text outputValue = new Text();

		String correctCase = value.getMostUsedArticleCasing();
		String correctKey = lowerCaseKey.toString().replace(correctCase.toLowerCase(), correctCase);
		
		if (correctKey.endsWith("//")) {
			correctKey = correctKey.replaceAll("////", "");
		}
	
		Text key = new Text(correctKey);
		
		for (Writable rawValue : value.get()) {
			LinkWritable link = (LinkWritable)rawValue;
			outputValue.set(link.toString());

			output.collect(key, outputValue);
			recordCount++;
		}
		
		reporter.incrCounter(COUNTER_GROUP, RECORDS_FETCHED, 1);
		reporter.incrCounter(COUNTER_GROUP, RESULTS_OUTPUT, recordCount);
		
	} catch(Exception e) {
		reporter.incrCounter(COUNTER_GROUP, MAP_EXCEPTION, 1);
		LOG.error(StringUtils.stringifyException(e));
	}
}

Example 6

Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0

5 votes

private int generateSentence(Text t, int noWords) {
  sentence.setLength(0);
  --noWords;
  for (int i = 0; i < noWords; ++i) {
    sentence.append(words[r.nextInt(words.length)]);
    sentence.append(" ");
  }
  if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]);
  t.set(sentence.toString());
  return sentence.length();
}

Example 7

Source File: RandomWriter.java From big-c with Apache License 2.0

5 votes

public boolean nextKeyValue() {
  if (name != null) {
    key = new Text();
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}

Example 8

Source File: TestMapRed.java From RDFS with Apache License 2.0

5 votes

public void testNullKeys() throws Exception {
  JobConf conf = new JobConf(TestMapRed.class);
  FileSystem fs = FileSystem.getLocal(conf);
  Path testdir = new Path(
      System.getProperty("test.build.data","/tmp")).makeQualified(fs);
  fs.delete(testdir, true);
  Path inFile = new Path(testdir, "nullin/blah");
  SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile,
      NullWritable.class, Text.class, SequenceFile.CompressionType.NONE);
  Text t = new Text();
  t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t);
  t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t);
  t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t);
  t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t);
  t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t);
  t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t);
  t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t);
  t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t);
  w.close();
  FileInputFormat.setInputPaths(conf, inFile);
  FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout"));
  conf.setMapperClass(NullMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setNumReduceTasks(1);

  JobClient.runJob(conf);

  SequenceFile.Reader r = new SequenceFile.Reader(fs,
      new Path(testdir, "nullout/part-00000"), conf);
  String m = "AAAAAAAAAAAAAA";
  for (int i = 1; r.next(NullWritable.get(), t); ++i) {
    assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString()));
    m = m.replace((char)('A' + i - 1), (char)('A' + i));
  }
}

Example 9

Source File: EsInputFormat.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
protected Text setCurrentKey(Text hadoopKey, Object object) {
    if (hadoopKey != null) {
        hadoopKey.set(object.toString());
    }
    return hadoopKey;
}

Example 10

Source File: FactDistinctColumnsReducerTest.java From kylin with Apache License 2.0

5 votes

@Test
public void testReducerStatistics() throws IOException {
    setConfigurations();
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_STATISTICS, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    // override the task id
    int dimColsSize = cubeDesc.getRowkey().getRowKeyColumns().length;
    int uhcSize = cubeDesc.getAllUHCColumns().size();
    final int targetTaskId = (dimColsSize - uhcSize) + uhcSize * cubeDesc.getConfig().getUHCReducerCount();

    setContextTaskId(targetTaskId);
    ByteBuffer tmpBuf = ByteBuffer.allocate(4096);
    tmpBuf.put((byte) FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER); // one byte
    tmpBuf.putLong(100);
    Text outputKey1 = new Text();
    outputKey1.set(tmpBuf.array(), 0, tmpBuf.position());
    SelfDefineSortableKey key1 = new SelfDefineSortableKey();
    key1.init(outputKey1, (byte) 0);

    HLLCounter hll = createMockHLLCounter();
    ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
    hllBuf.clear();
    hll.writeRegisters(hllBuf);
    Text value1 = new Text();
    value1.set(hllBuf.array(), 0, hllBuf.position());

    reduceDriver.setInput(key1, ImmutableList.of(value1));

    List<Pair<NullWritable, Text>> result = reduceDriver.run();
    assertEquals(0, result.size()); // the reducer output statistics info to a sequence file.
}

Example 11

Source File: MockRecordReader.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

@Override
public boolean next( Text key, Text value ) throws IOException {
  if ( !rowIter.hasNext() ) {
    return false;
  }
  rowNum++;
  key.set( String.valueOf( rowNum ) );
  value.set( rowIter.next() );
  return true;
}

Example 12

Source File: SequenceFileAsTextRecordReader.java From big-c with Apache License 2.0

5 votes

/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}

Example 13

Source File: PatternMatcherTest.java From jumbune with GNU Lesser General Public License v3.0

5 votes

@Test
public void matchTestAgainstStringNull() {
	Text value = new Text();
	value.set("null");
	boolean check = PatternMatcher.match(value);
	assertFalse(check);
}

Example 14

Source File: TestIndexedSort.java From hadoop with Apache License 2.0

5 votes

private static void genRandom(Text t, int len, StringBuilder sb) {
  sb.setLength(0);
  for (int i = 0; i < len; ++i) {
    sb.append(Integer.toString(r.nextInt(26) + 10, 36));
  }
  t.set(sb.toString());
}

Example 15

Source File: SequenceFileAsTextRecordReader.java From hadoop-gpu with Apache License 2.0

5 votes

/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}

Example 16

Source File: SequenceFileProtobufWriter.java From hiped2 with Apache License 2.0

5 votes

/**
   * Write the sequence file.
   *
   * @param args the command-line arguments
   * @return the process exit code
   * @throws Exception if something goes wrong
   */
  public int run(final String[] args) throws Exception {

    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
      return result;
    }

    File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

Configuration conf = super.getConf();

ProtobufSerialization.register(conf);

SequenceFile.Writer writer =
    SequenceFile.createWriter(conf,
        SequenceFile.Writer.file(outputPath),
        SequenceFile.Writer.keyClass(Text.class),
        SequenceFile.Writer.valueClass(Stock.class),
        SequenceFile.Writer.compression(
            SequenceFile.CompressionType.BLOCK,
            new DefaultCodec())
    );
try {
  Text key = new Text();

  for (Stock stock : StockUtils.fromCsvFile(inputFile)) {
    key.set(stock.getSymbol());
    writer.append(key, stock);
  }
} finally {
  writer.close();
}
    return 0;
  }

Example 17

Source File: KettleTypeToTextConverter.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

@Override
public Text convert( ValueMetaInterface meta, Object obj ) throws TypeConversionException {
  try {
    Text text = new Text();
    text.set( meta.getString( obj ) );
    return text;
  } catch ( KettleValueException ex ) {
    throw new TypeConversionException(
      BaseMessages.getString( TypeConverterFactory.class, "ErrorConverting", Text.class.getSimpleName(), obj ), ex );
  }
}

Example 18

Source File: FileBench.java From hadoop-gpu with Apache License 2.0

5 votes

@SuppressWarnings("unchecked") // OutputFormat instantiation
static long writeBench(JobConf conf) throws IOException {
  long filelen = conf.getLong("filebench.file.bytes", 5 * 1024 * 1024 * 1024);
  Text key = new Text();
  Text val = new Text();

  final String fn = conf.get("test.filebench.name", "");
  final Path outd = FileOutputFormat.getOutputPath(conf);
  conf.set("mapred.work.output.dir", outd.toString());
  OutputFormat outf = conf.getOutputFormat();
  RecordWriter<Text,Text> rw =
    outf.getRecordWriter(outd.getFileSystem(conf), conf, fn,
                         Reporter.NULL);
  try {
    long acc = 0L;
    Date start = new Date();
    for (int i = 0; acc < filelen; ++i) {
      i %= keys.length;
      key.set(keys[i]);
      val.set(values[i]);
      rw.write(key, val);
      acc += keys[i].length();
      acc += values[i].length();
    }
    Date end = new Date();
    return end.getTime() - start.getTime();
  } finally {
    rw.close(Reporter.NULL);
  }
}

Example 19

Source File: LexicoderRowSerializer.java From presto with Apache License 2.0

4 votes

@Override
public void setDouble(Text text, Double value)
{
    text.set(encode(DOUBLE, value));
}

Example 20

Source File: CSVOutputUtils.java From incubator-retired-pirk with Apache License 2.0

3 votes

public static Text setCSVOutput(String domain, String ip, String timestamp, String generic)
{
  Text value = new Text();

  String csvOut = domain + "," + ip + "," + timestamp + "," + generic;

  value.set(csvOut);

  return value;
}