Java Code Examples for org.apache.hadoop.io.Text#getBytes()

The following examples show how to use org.apache.hadoop.io.Text#getBytes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TsvImporterTextMapper.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a line of TSV text into an HBase table row.
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
  try {
    Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength());
    ImmutableBytesWritable rowKey = new ImmutableBytesWritable(
        value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
    context.write(rowKey, value);
  } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) {
    if (logBadLines) {
      System.err.println(value);
    }
    System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
    if (skipBadLines) {
      incrementBadLineCount(1);
      return;
    }
    throw new IOException(badLine);
  } catch (InterruptedException e) {
    LOG.error("Interrupted while emitting TSV text", e);
    Thread.currentThread().interrupt();
  }
}
 
Example 2
Source File: DecisionPathUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
public List<String> evaluate(@Nonnull final String modelId, @Nonnull final Text script,
        @Nonnull final Vector features) throws HiveException {
    if (!modelId.equals(prevModelId)) {
        this.prevModelId = modelId;
        int length = script.getLength();
        byte[] b = script.getBytes();
        b = Base91.decode(b, 0, length);
        this.rNode = RegressionTree.deserialize(b, b.length, true);
    }
    Preconditions.checkNotNull(rNode);

    handler.init();
    rNode.predict(features, handler);
    return handler.getResult();
}
 
Example 3
Source File: AccumuloUtils.java    From mrgeo with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a Text object of a tileId to a back to a long.
 *
 * @param rowId Text object to convert.
 * @return the long value from the Text object.
 */
public static long toLong(Text rowId)
{

  byte[] outB = new byte[8];
  for (int x = 0; x < outB.length; x++)
  {
    if (x >= rowId.getLength())
    {
      outB[x] = 0x0;
    }
    else
    {
      outB[x] = rowId.getBytes()[x];
    }
  }

  return ByteBuffer.wrap(outB).getLong();
}
 
Example 4
Source File: TreePredictUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
public Object[] evaluate(@Nonnull final String modelId, @Nonnull final Text script,
        @Nonnull final Vector features) throws HiveException {
    if (!modelId.equals(prevModelId)) {
        this.prevModelId = modelId;
        int length = script.getLength();
        byte[] b = script.getBytes();
        b = Base91.decode(b, 0, length);
        this.cNode = DecisionTree.deserialize(b, b.length, true);
    }

    Arrays.fill(result, null);
    Preconditions.checkNotNull(cNode);
    cNode.predict(features, new PredictionHandler() {
        public void visitLeaf(int output, double[] posteriori) {
            result[0] = new IntWritable(output);
            result[1] = WritableUtils.toWritableList(posteriori);
        }
    });

    return result;
}
 
Example 5
Source File: TextUtilTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Test
public void testAppend_multiByteChars() throws UnsupportedEncodingException {
    String prefix = "prefix\u6C34";
    int prefixLength = prefix.getBytes("UTF-8").length;
    Text text = new Text(prefix.getBytes("UTF-8"));
    // A random multi-byte char string I found. Don't know what it means.
    String multiByteCharString = "\u007A\u6C34\uD834\uDD1E";
    TextUtil.textAppend(text, multiByteCharString);
    byte[] stringBytes = multiByteCharString.getBytes("UTF-8");
    Assert.assertEquals("Length was wrong", 1 + stringBytes.length, text.getLength() - prefixLength);
    byte[] textBytes = text.getBytes();
    Assert.assertEquals("First byte was wrong", (byte) 0, textBytes[prefixLength]);
    byte[] restOfText = new byte[text.getLength() - prefixLength - 1];
    System.arraycopy(textBytes, 1 + prefixLength, restOfText, 0, restOfText.length);
    Assert.assertArrayEquals("Contents were wrong", stringBytes, restOfText);
}
 
Example 6
Source File: SparkFactDistinct.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
public int getPartition(Object o) {
    if (initialized == false) {
        synchronized (SparkFactDistinct.class) {
            if (initialized == false) {
                init();
            }
        }
    }

    SelfDefineSortableKey skey = (SelfDefineSortableKey) o;
    Text key = skey.getText();
    if (key.getBytes()[0] == FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER) {
        Long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
        return reducerMapping.getReducerIdForCuboidRowCount(cuboidId);
    } else {
        return BytesUtil.readUnsigned(key.getBytes(), 0, 1);
    }
}
 
Example 7
Source File: TsvImporterCustomTestMapper.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Convert a line of TSV text into an HBase table row after transforming the
 * values by multiplying them by 3.
 */
@Override
public void map(LongWritable offset, Text value, Context context)
      throws IOException {
  byte[] family = Bytes.toBytes("FAM");
  final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };

  // do some basic line parsing
  byte[] lineBytes = value.getBytes();
  String[] valueTokens = new String(lineBytes, StandardCharsets.UTF_8).split("\u001b");

  // create the rowKey and Put
  ImmutableBytesWritable rowKey =
    new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
  Put put = new Put(rowKey.copyBytes());
  put.setDurability(Durability.SKIP_WAL);

  //The value should look like this: VALUE1 or VALUE2. Let's multiply
  //the integer by 3
  for(int i = 1; i < valueTokens.length; i++) {
    String prefix = valueTokens[i].substring(0, "VALUE".length());
    String suffix = valueTokens[i].substring("VALUE".length());
    String newValue = prefix + Integer.parseInt(suffix) * 3;

    KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
        qualifiers[i-1], Bytes.toBytes(newValue));
    put.add(kv);
  }

  try {
    context.write(rowKey, put);
  } catch (InterruptedException e) {
    e.printStackTrace();
  }
}
 
Example 8
Source File: TreePredictUDFv1.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private DoubleWritable evaluateRegression(@Nonnull String modelId, boolean compressed,
        @Nonnull Text script, double[] features) throws HiveException {
    if (!modelId.equals(prevModelId)) {
        this.prevModelId = modelId;
        int length = script.getLength();
        byte[] b = script.getBytes();
        b = Base91.decode(b, 0, length);
        this.rNode = deserializeRegressionTree(b, b.length, compressed);
    }
    assert (rNode != null);
    double result = rNode.predict(features);
    return new DoubleWritable(result);
}
 
Example 9
Source File: BaseCuboidMapperTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapperWithHeader() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
}
 
Example 10
Source File: PrepareClusterJob.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void reduce(Text key, Iterable<Text> values, Context context)
		throws IOException, InterruptedException {
	Text text = values.iterator().next();
	String[] columns = text.toString().split(split_str);
	Put put = new Put(key.getBytes());
	put.add(Bytes.toBytes(Constants.hbase_column_family),
			Bytes.toBytes(Constants.hbase_column_yearrate),
			Bytes.toBytes(columns[0]));
	put.add(Bytes.toBytes(Constants.hbase_column_family),
			Bytes.toBytes(Constants.hbase_column_repaylimittime),
			Bytes.toBytes(columns[1]));
	context.write(NullWritable.get(), put);
}
 
Example 11
Source File: HiveFieldConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((HiveCharObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue).getStrippedValue();
  final int valueLen = value.getLength();
  checkSizeLimit(valueLen);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, valueLen);
}
 
Example 12
Source File: AgeOffFilterBase.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
    super.init(source, options, env);
    if (options == null)
        throw new IllegalArgumentException("ttl must be set for DateBasedAgeOffFilter");
    
    String ttl = options.get("ttl");
    if (ttl == null)
        throw new IllegalArgumentException("ttl must be set for DateBasedAgeOffFilter");
    
    int thresholdDays = Integer.parseInt(ttl);
    Text cutoffDateText = new Text(DateHelper.format(getCutoffDate(thresholdDays)));
    cutoffDate = cutoffDateText.getBytes();
    cutoffDateLen = cutoffDateText.getLength();
}
 
Example 13
Source File: JoinSelectProspectOutputTest.java    From rya with Apache License 2.0 5 votes vote down vote up
@Test
public void testOutput() throws InterruptedException, IOException {

    String s = "urn:gem:etype#1234";
    String p = "urn:gem#pred";

    String ts = "798497748386999999";
    
    Text t1 = new Text(TripleValueType.subject.name() + DELIM + s + DELIM + 1);
    Text t2 = new Text(TripleValueType.predicate.name() + DELIM + p + DELIM + 2);
    Text t3 = new Text(TripleValueType.subjectpredicate.name() + DELIM + s + DELIM + p + DELIM + ts);

    byte[] b = new byte[0];
    byte[] c = "25".getBytes();
    byte[] d = "47".getBytes();
    byte[] e = "15".getBytes();

    Key key1 = new Key(t1.getBytes(), b, b, b, 1);
    Key key2 = new Key(t2.getBytes(), b, b, b, 1);
    Key key3 = new Key(t3.getBytes(), b, b, b, 1);
    Value val1 = new Value(c);
    Value val2 = new Value(d);
    Value val3 = new Value(e);
    
   

    // System.out.println("Keys are " + key1 + " and " + key2);

    new MapDriver<Key, Value, CompositeType, TripleCard>()
            .withMapper(new JoinSelectProspectOutput.CardinalityMapper())
            .withInput(key1, val1)
            .withInput(key2, val2)
            .withInput(key3, val3)
            .withOutput(new CompositeType(s, 1), new TripleCard(new CardinalityType(25, "subject", 1)))
            .withOutput(new CompositeType(p, 1), new TripleCard(new CardinalityType(47, "predicate", 2)))
            .withOutput(new CompositeType(s + DELIM + p, 1),
                    new TripleCard(new CardinalityType(15, "subjectpredicate", Long.parseLong(ts)))).runTest();

}
 
Example 14
Source File: HiveFieldConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
  final int len = value.getLength();
  checkSizeLimit(len);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, len);
}
 
Example 15
Source File: HiveFieldConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue).getTextValue();
  final int valueLen = value.getLength();
  checkSizeLimit(valueLen);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, valueLen);
}
 
Example 16
Source File: Utils.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Write a String as a VInt n, followed by n Bytes as in Text format.
 * 
 * @param out
 * @param s
 * @throws IOException
 */
public static void writeString(DataOutput out, String s) throws IOException {
  if (s != null) {
    Text text = new Text(s);
    byte[] buffer = text.getBytes();
    int len = text.getLength();
    writeVInt(out, len);
    out.write(buffer, 0, len);
  } else {
    writeVInt(out, -1);
  }
}
 
Example 17
Source File: BaseCuboidMapperTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapperWithNull() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
}
 
Example 18
Source File: CSVLoader.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();

    boolean inField = false;
    boolean inQuotedField = false;
    boolean evenQuotesSeen = true;
    
    if (!mRequiredColumnsInitialized) {
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[])ObjectSerializer.deserialize(p.getProperty(signature));
        }
        mRequiredColumnsInitialized = true;
    }
    try {
        if (!in.nextKeyValue()) {
            return null;
        }                                                                                           
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int fieldID = 0;

        ByteBuffer fieldBuffer = ByteBuffer.allocate(len);

        for (int i = 0; i < len; i++) {
            byte b = buf[i];
            inField = true;
            if (inQuotedField) {
                if (b == DOUBLE_QUOTE) {
                    evenQuotesSeen = !evenQuotesSeen;
                    if (evenQuotesSeen) {
                        fieldBuffer.put(DOUBLE_QUOTE);
                    }
                } else
                    if (!evenQuotesSeen &&
                            (b == FIELD_DEL || b == RECORD_DEL)) {
                        inQuotedField = false;
                        inField = false;
                        readField(fieldBuffer, fieldID++);
                    } else {
                        fieldBuffer.put(b);
                    }
            } else if (b == DOUBLE_QUOTE) {
                inQuotedField = true;
                evenQuotesSeen = true;
            } else if (b == FIELD_DEL) {
                inField = false;
                readField(fieldBuffer, fieldID++); // end of the field
            } else {
                evenQuotesSeen = true;
                fieldBuffer.put(b);
            }
        }
        if (inField) readField(fieldBuffer, fieldID++);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, 
                PigException.REMOTE_ENVIRONMENT, e);
    }

    Tuple t =  mTupleFactory.newTupleNoCopy(mProtoTuple);
    return t;
}
 
Example 19
Source File: MergeDictReducer.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String col = key.toString();
    logger.info("merge dictionary for column:{}", col);
    TblColRef tblColRef = colNeedDictMap.get(col);

    if (tblColRef == null) {
        logger.warn("column:{} not found in the columns need dictionary map: {}", col, colNeedDictMap.keySet());
        return;
    }

    DataType dataType = tblColRef.getType();
    List<Dictionary<String>> dicts = Lists.newLinkedList();
    for (Text value : values) {
        ByteArray byteArray = new ByteArray(value.getBytes());
        Dictionary<String> dict = (Dictionary<String>) DictionarySerializer.deserialize(byteArray);
        dicts.add(dict);
    }
    Dictionary mergedDict;
    if (dicts.size() > 1) {
        MultipleDictionaryValueEnumerator multipleDictionaryValueEnumerator = new MultipleDictionaryValueEnumerator(
                dataType, dicts);
        mergedDict = DictionaryGenerator.buildDictionary(dataType, multipleDictionaryValueEnumerator);
    } else if (dicts.size() == 1) {
        mergedDict = dicts.get(0);
    } else {
        throw new IllegalArgumentException("Dictionary missing for column " + col);
    }
    if (mergedDict == null) {
        throw new IllegalArgumentException("Merge dictionaries error for column " + col);
    }

    TableDesc tableDesc = tblColRef.getColumnDesc().getTable();
    IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath(tableDesc.getResourcePath());

    //TODO: Table signature size?
    //        signature.setSize(mergedDict.getSize());

    DictionaryInfo dictionaryInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef
            .getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
    dictionaryInfo.setDictionaryObject(mergedDict);
    dictionaryInfo.setDictionaryClass(mergedDict.getClass().getName());
    dictionaryInfo.setCardinality(mergedDict.getSize());

    ByteArrayOutputStream fulBuf = new ByteArrayOutputStream();
    DataOutputStream fulDout = new DataOutputStream(fulBuf);
    DictionaryInfoSerializer.FULL_SERIALIZER.serialize(dictionaryInfo, fulDout);

    Text outValue = new Text(fulBuf.toByteArray());
    context.write(key, outValue);
    logger.debug("output dict info of column {} to path: {}", col,
            context.getConfiguration().get(FileOutputFormat.OUTDIR));
}
 
Example 20
Source File: PigStorage.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();
    if (!mRequiredColumnsInitialized) {
        if (signature!=null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[])ObjectSerializer.deserialize(p.getProperty(signature));
        }
        mRequiredColumnsInitialized = true;
    }
    //Prepend input source path if source tagging is enabled
    if(tagFile) {
        mProtoTuple.add(new DataByteArray(sourcePath.getName()));
    } else if (tagPath) {
        mProtoTuple.add(new DataByteArray(sourcePath.toString()));
    }

    try {
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int start = 0;
        int fieldID = 0;
        for (int i = 0; i < len; i++) {
            if (buf[i] == fieldDel) {
                if (mRequiredColumns==null || (mRequiredColumns.length>fieldID && mRequiredColumns[fieldID]))
                    addTupleValue(mProtoTuple, buf, start, i);
                start = i + 1;
                fieldID++;
            }
        }
        // pick up the last field
        if (start <= len && (mRequiredColumns==null || (mRequiredColumns.length>fieldID && mRequiredColumns[fieldID]))) {
            addTupleValue(mProtoTuple, buf, start, len);
        }
        Tuple t =  mTupleFactory.newTupleNoCopy(mProtoTuple);

        return dontLoadSchema ? t : applySchema(t);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode,
                PigException.REMOTE_ENVIRONMENT, e);
    }
}