Java Code Examples for org.apache.parquet.io.api.Binary#getBytes()

The following examples show how to use org.apache.parquet.io.api.Binary#getBytes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetConverter.java    From pentaho-hadoop-shims with Apache License 2.0 6 votes vote down vote up
private static long dateFromInt96( Binary value ) {
  byte[] readBuffer = value.getBytes();
  if ( readBuffer.length != 12 ) {
    throw new RuntimeException( "Invalid byte array length for INT96" );
  }

  long timeOfDayNanos =
    ( ( (long) readBuffer[ 7 ] << 56 ) + ( (long) ( readBuffer[ 6 ] & 255 ) << 48 )
      + ( (long) ( readBuffer[ 5 ] & 255 ) << 40 ) + ( (long) ( readBuffer[ 4 ] & 255 ) << 32 )
      + ( (long) ( readBuffer[ 3 ] & 255 ) << 24 ) + ( ( readBuffer[ 2 ] & 255 ) << 16 )
      + ( ( readBuffer[ 1 ] & 255 ) << 8 ) + ( readBuffer[ 0 ] & 255 ) );

  int julianDay =
    ( (int) ( readBuffer[ 11 ] & 255 ) << 24 ) + ( ( readBuffer[ 10 ] & 255 ) << 16 )
      + ( ( readBuffer[ 9 ] & 255 ) << 8 ) + ( readBuffer[ 8 ] & 255 );

  return ( julianDay - ParquetSpec.JULIAN_DAY_OF_EPOCH ) * 24L * 60L * 60L * 1000L + timeOfDayNanos / 1000000;
}
 
Example 2
Source File: DrillParquetGroupConverter.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public void addBinary(Binary value) {
  final byte[] input = value.getBytes();
  holder.months = ParquetReaderUtility.getIntFromLEBytes(input, 0);
  holder.days = ParquetReaderUtility.getIntFromLEBytes(input, 4);
  holder.milliseconds = ParquetReaderUtility.getIntFromLEBytes(input, 8);
  writer.write(holder);
}
 
Example 3
Source File: ParquetTimestampUtils.java    From presto with Apache License 2.0 5 votes vote down vote up
/**
 * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos).
 *
 * @param timestampBinary INT96 parquet timestamp
 * @return timestamp in millis, GMT timezone
 */
public static long getTimestampMillis(Binary timestampBinary)
{
    if (timestampBinary.length() != 12) {
        throw new PrestoException(NOT_SUPPORTED, "Parquet timestamp must be 12 bytes, actual " + timestampBinary.length());
    }
    byte[] bytes = timestampBinary.getBytes();

    // little endian encoding - need to invert byte order
    long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
    int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]);

    return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
}
 
Example 4
Source File: ParquetTimestampUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos).
 *
 * @param timestampBinary INT96 parquet timestamp
 * @return timestamp in millis, GMT timezone
 */
public static long getTimestampMillis(Binary timestampBinary) {
	if (timestampBinary.length() != 12) {
		throw new IllegalArgumentException("Parquet timestamp must be 12 bytes, actual " + timestampBinary.length());
	}
	byte[] bytes = timestampBinary.getBytes();

	// little endian encoding - need to invert byte order
	long timeOfDayNanos = ByteBuffer.wrap(new byte[] {bytes[7], bytes[6], bytes[5], bytes[4],
		bytes[3], bytes[2], bytes[1], bytes[0]}).getLong();
	int julianDay = ByteBuffer.wrap(new byte[] {bytes[11], bytes[10], bytes[9], bytes[8]}).getInt();

	return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
}
 
Example 5
Source File: ParquetGroupConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void addBinary(Binary value) {
  final int length = value.length();
  final byte[] bytes = value.getBytes();
  /* set the bytes in LE format in the buffer of decimal vector, we will swap
   * the bytes while writing into the vector.
   */
  writer.writeBigEndianBytesToDecimal(bytes, new ArrowType.Decimal(holder.precision, holder.scale));
  setWritten();
}
 
Example 6
Source File: ParquetTimestampUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos).
 *
 * @param timestampBinary INT96 parquet timestamp
 * @return timestamp in millis, GMT timezone
 */
public static long getTimestampMillis(Binary timestampBinary) {
	if (timestampBinary.length() != 12) {
		throw new IllegalArgumentException("Parquet timestamp must be 12 bytes, actual " + timestampBinary.length());
	}
	byte[] bytes = timestampBinary.getBytes();

	// little endian encoding - need to invert byte order
	long timeOfDayNanos = ByteBuffer.wrap(new byte[] {bytes[7], bytes[6], bytes[5], bytes[4],
		bytes[3], bytes[2], bytes[1], bytes[0]}).getLong();
	int julianDay = ByteBuffer.wrap(new byte[] {bytes[11], bytes[10], bytes[9], bytes[8]}).getInt();

	return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
}
 
Example 7
Source File: DeltaByteArrayWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void writeBytes(Binary v) {
  int i = 0;
  byte[] vb = v.getBytes();
  int length = previous.length < vb.length ? previous.length : vb.length;
  // find the number of matching prefix bytes between this value and the previous one
  for(i = 0; (i < length) && (previous[i] == vb[i]); i++);
  prefixLengthWriter.writeInteger(i);
  suffixWriter.writeBytes(v.slice(i, vb.length - i));
  previous = vb;
}
 
Example 8
Source File: DecimalUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static BigDecimal binaryToDecimal(Binary value, int precision, int scale) {
  /*
   * Precision <= 18 checks for the max number of digits for an unscaled long,
   * else treat with big integer conversion
   */
  if (precision <= 18) {
    ByteBuffer buffer = value.toByteBuffer();
    byte[] bytes = buffer.array();
    int start = buffer.arrayOffset() + buffer.position();
    int end = buffer.arrayOffset() + buffer.limit();
    long unscaled = 0L;
    int i = start;
    while ( i < end ) {
      unscaled = ( unscaled << 8 | bytes[i] & 0xff );
      i++;
    }
    int bits = 8*(end - start);
    long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits);
    if (unscaledNew <= -pow(10,18) || unscaledNew >= pow(10,18)) {
      return new BigDecimal(unscaledNew);
    } else {
      return BigDecimal.valueOf(unscaledNew / pow(10,scale));
    }
  } else {
    return new BigDecimal(new BigInteger(value.getBytes()), scale);
  }
}
 
Example 9
Source File: ParquetConverter.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
static BigDecimal binaryToDecimal( Binary value, int precision, int scale ) {
  /*
   * Precision <= 18 checks for the max number of digits for an unscaled long,
   * else treat with big integer conversion
   */
  if ( precision <= 18 ) {
    ByteBuffer buffer = value.toByteBuffer();
    byte[] bytes = buffer.array();
    int start = buffer.arrayOffset() + buffer.position();
    int end = buffer.arrayOffset() + buffer.limit();
    long unscaled = 0L;
    int i = start;
    while ( i < end ) {
      unscaled = ( unscaled << 8 | bytes[ i ] & 0xff );
      i++;
    }
    int bits = 8 * ( end - start );
    long unscaledNew = ( unscaled << ( 64 - bits ) ) >> ( 64 - bits );
    if ( unscaledNew <= -pow( 10, 18 ) || unscaledNew >= pow( 10, 18 ) ) {
      return new BigDecimal( unscaledNew );
    } else {
      return BigDecimal.valueOf( unscaledNew / pow( 10, scale ) );
    }
  } else {
    return new BigDecimal( new BigInteger( value.getBytes() ), scale );
  }
}
 
Example 10
Source File: AvroConverters.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public byte[] convert(Binary binary) {
  return binary.getBytes();
}
 
Example 11
Source File: AvroConverters.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public Utf8 convert(Binary binary) {
  return new Utf8(binary.getBytes());
}