Java Code Examples for org.apache.hadoop.io.Text#decode()

The following examples show how to use org.apache.hadoop.io.Text#decode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NormalizedFieldAndValue.java    From datawave with Apache License 2.0 6 votes vote down vote up
/**
 * This method will convert bytes into a string value. If not expected to be binary, then it will attempt to decode as UTF8. If that fails or is expected to
 * be binary, then it will decode one byte per character.
 * 
 * @param bytes
 * @param expectBinary
 * @return the value
 */
public static String decode(byte[] bytes, boolean expectBinary) {
    synchronized (NormalizedFieldAndValue.class) {
        if (null == mimeDecoder) {
            IngestConfiguration config = IngestConfigurationFactory.getIngestConfiguration();
            mimeDecoder = config.createMimeDecoder();
        }
    }
    String value = null;
    if (!expectBinary) {
        try {
            value = Text.decode(mimeDecoder.decode(bytes));
        } catch (Exception e) {
            // ok, treat as binary
        }
    }
    if (value == null) {
        try {
            value = new String(bytes, "ISO8859-1");
        } catch (UnsupportedEncodingException uee) {
            // this should never happen, however....
            throw new RuntimeException(uee);
        }
    }
    return value;
}
 
Example 2
Source File: EdgeKey.java    From datawave with Apache License 2.0 6 votes vote down vote up
/**
 * @param bytes
 *            byte array holding the parts of the edge key
 * @param bLen
 *            number of bytes to use (important: the byte array may be reused so its length may not be correct)
 */
private void getParts(byte[] bytes, int bLen) {
    try {
        int start = 0;
        for (int i = 0; i < bLen; i++) {
            if (pLen >= parts.length) {
                throw new RuntimeException("Exceeded number of possible number of parts (" + parts.length + ")." + "  bytes as String: "
                                + new Text(bytes) + " parts: " + Arrays.toString(parts));
            }
            if (bytes[i] == COL_SEPARATOR_BYTE) {
                parts[pLen++] = Text.decode(bytes, start, i - start);
                start = i + 1;
            }
        }
        parts[pLen++] = Text.decode(bytes, start, bLen - start);
        
    } catch (CharacterCodingException e) {
        throw new RuntimeException("Edge key column encoding exception", e);
    }
}
 
Example 3
Source File: EdgeKeyDecoder.java    From datawave with Apache License 2.0 6 votes vote down vote up
public static String getYYYYMMDD(Text colQual) {
    int numCharsToCheck = Math.min(DATE_LEN + 1, colQual.getLength());
    
    int firstSlashIndex = DATE_LEN; // there may not be a slash
    byte[] bytes = colQual.getBytes();
    
    // find the first slash if it exists
    for (int i = 0; i < numCharsToCheck; i++) {
        if (bytes[i] == COL_SEPARATOR_BYTE) {
            firstSlashIndex = i;
            break;
        }
    }
    
    try {
        return Text.decode(colQual.getBytes(), 0, Math.min(colQual.getLength(), firstSlashIndex));
    } catch (CharacterCodingException e) {
        // same behavior as EdgeKey.getParts
        throw new RuntimeException("Edge key column encoding exception", e);
    }
}
 
Example 4
Source File: FastaInputFormat.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
/**
 * Reads the next key/value pair from the input for processing.
 */
public boolean next(Text key, ReferenceFragment value) throws IOException
{
	if (pos >= end)
		return false; // past end of slice

	int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
	pos += bytesRead;
	if (bytesRead >= MAX_LINE_LENGTH)
		throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " + makePositionMessage(pos - bytesRead) + ": " + Text.decode(buffer.getBytes(), 0, 500));
	else if (bytesRead <= 0)
		return false; // EOF
	else
	{
		scanFastaLine(buffer, key, value);
		current_split_pos += bytesRead;
		return true;
	}
}
 
Example 5
Source File: QseqInputFormat.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
private int lowLevelQseqRead(Text key, SequencedFragment value) throws IOException
{
	int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
	pos += bytesRead;
	if (bytesRead >= MAX_LINE_LENGTH)
	{
		String line;
		try {
			line = Text.decode(buffer.getBytes(), 0, 500);
		} catch (java.nio.charset.CharacterCodingException e) {
			line = "(line not convertible to printable format)";
		}
		throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " +
		            makePositionMessage(pos - bytesRead) + ": " + line);
	}
	else if (bytesRead > 0)
		scanQseqLine(buffer, key, value);

	return bytesRead;
}
 
Example 6
Source File: ValueToAttributes.java    From datawave with Apache License 2.0 5 votes vote down vote up
public Attribute<?> getFieldValue(String fieldName, Key k) {
    k.getColumnQualifier(holder);
    int index = holder.find(Constants.NULL);
    
    if (0 > index) {
        throw new IllegalArgumentException("Could not find null-byte contained in columnqualifier for key: " + k);
    }
    
    try {
        String data = Text.decode(holder.getBytes(), index + 1, (holder.getLength() - (index + 1)));
        
        ColumnVisibility cv = getCV(k);
        
        Attribute<?> attr = this.attrFactory.create(fieldName, data, k, (attrFilter == null || attrFilter.keep(k)));
        if (attrFilter != null) {
            attr.setToKeep(attrFilter.keep(k));
        }
        
        if (log.isTraceEnabled()) {
            log.trace("Created " + attr.getClass().getName() + " for " + fieldName);
        }
        
        return attr;
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}
 
Example 7
Source File: KeyToFieldName.java    From datawave with Apache License 2.0 5 votes vote down vote up
public String getFieldName(Key k) {
    
    int index = -1;
    
    ByteSequence sequence = k.getColumnQualifierData();
    
    byte[] arrayReference = sequence.getBackingArray();
    
    for (int i = 0; i < sequence.length(); i++) {
        if (!includeGroupingContext && arrayReference[i] == '.') {
            index = i;
            break;
        }
        if (arrayReference[i] == 0x00) {
            index = i;
            break;
        }
    }
    
    if (0 > index) {
        throw new IllegalArgumentException("Could not find null-byte contained in columnqualifier for key: " + k);
    }
    
    try {
        
        return Text.decode(arrayReference, 0, index);
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}
 
Example 8
Source File: TextUtil.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Converts a UTF-8 encoded byte array back into a String.
 *
 * @param bytes
 * @return string
 */
public static String fromUtf8(byte[] bytes) {
    try {
        return Text.decode(bytes);
    } catch (CharacterCodingException e) {
        throw new IllegalArgumentException(e);
    }
}
 
Example 9
Source File: EdgeKeyUtil.java    From datawave with Apache License 2.0 4 votes vote down vote up
public static String decodeDate(Text date) throws CharacterCodingException {
    return Text.decode(date.getBytes(), 0, 8);
}
 
Example 10
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Nonnull
private static Object getObjectOfCorrespondingPrimitiveType(String s,
        PrimitiveTypeInfo mapKeyType) throws IOException {
    switch (Type.getPrimitiveHType(mapKeyType)) {
        case INT:
            return Integer.valueOf(s);
        case TINYINT:
            return Byte.valueOf(s);
        case SMALLINT:
            return Short.valueOf(s);
        case BIGINT:
            return Long.valueOf(s);
        case BOOLEAN:
            return (s.equalsIgnoreCase("true"));
        case FLOAT:
            return Float.valueOf(s);
        case DOUBLE:
            return Double.valueOf(s);
        case STRING:
            return s;
        case BINARY:
            try {
                String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
                return t.getBytes();
            } catch (CharacterCodingException e) {
                throw new IOException("Error generating json binary type from object.", e);
            }
        case DATE:
            return Date.valueOf(s);
        case TIMESTAMP:
            return Timestamp.valueOf(s);
        case DECIMAL:
            return HiveDecimal.create(s);
        case VARCHAR:
            return new HiveVarchar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
        case CHAR:
            return new HiveChar(s, ((BaseCharTypeInfo) mapKeyType).getLength());
        default:
            throw new IOException(
                "Could not convert from string to map type " + mapKeyType.getTypeName());
    }
}
 
Example 11
Source File: HiveJsonStructReader.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private Object getObjectOfCorrespondingPrimitiveType(String s, PrimitiveObjectInspector oi)
        throws IOException {
    PrimitiveTypeInfo typeInfo = oi.getTypeInfo();
    if (writeablePrimitives) {
        Converter c = ObjectInspectorConverters.getConverter(
            PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
        return c.convert(s);
    }

    switch (typeInfo.getPrimitiveCategory()) {
        case INT:
            return Integer.valueOf(s);
        case BYTE:
            return Byte.valueOf(s);
        case SHORT:
            return Short.valueOf(s);
        case LONG:
            return Long.valueOf(s);
        case BOOLEAN:
            return (s.equalsIgnoreCase("true"));
        case FLOAT:
            return Float.valueOf(s);
        case DOUBLE:
            return Double.valueOf(s);
        case STRING:
            return s;
        case BINARY:
            try {
                String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
                return t.getBytes();
            } catch (CharacterCodingException e) {
                LOG.warn("Error generating json binary type from object.", e);
                return null;
            }
        case DATE:
            return Date.valueOf(s);
        case TIMESTAMP:
            return Timestamp.valueOf(s);
        case DECIMAL:
            return HiveDecimal.create(s);
        case VARCHAR:
            return new HiveVarchar(s, ((BaseCharTypeInfo) typeInfo).getLength());
        case CHAR:
            return new HiveChar(s, ((BaseCharTypeInfo) typeInfo).getLength());
        default:
            throw new IOException(
                "Could not convert from string to " + typeInfo.getPrimitiveCategory());
    }
}
 
Example 12
Source File: Utils.java    From hadoop with Apache License 2.0 3 votes vote down vote up
/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
Example 13
Source File: Utils.java    From big-c with Apache License 2.0 3 votes vote down vote up
/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
Example 14
Source File: Utils.java    From RDFS with Apache License 2.0 3 votes vote down vote up
/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
Example 15
Source File: Utils.java    From hadoop-gpu with Apache License 2.0 3 votes vote down vote up
/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}