Java Code Examples for org.apache.parquet.io.api.Binary#getBytes()
The following examples show how to use
org.apache.parquet.io.api.Binary#getBytes() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetConverter.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
private static long dateFromInt96( Binary value ) { byte[] readBuffer = value.getBytes(); if ( readBuffer.length != 12 ) { throw new RuntimeException( "Invalid byte array length for INT96" ); } long timeOfDayNanos = ( ( (long) readBuffer[ 7 ] << 56 ) + ( (long) ( readBuffer[ 6 ] & 255 ) << 48 ) + ( (long) ( readBuffer[ 5 ] & 255 ) << 40 ) + ( (long) ( readBuffer[ 4 ] & 255 ) << 32 ) + ( (long) ( readBuffer[ 3 ] & 255 ) << 24 ) + ( ( readBuffer[ 2 ] & 255 ) << 16 ) + ( ( readBuffer[ 1 ] & 255 ) << 8 ) + ( readBuffer[ 0 ] & 255 ) ); int julianDay = ( (int) ( readBuffer[ 11 ] & 255 ) << 24 ) + ( ( readBuffer[ 10 ] & 255 ) << 16 ) + ( ( readBuffer[ 9 ] & 255 ) << 8 ) + ( readBuffer[ 8 ] & 255 ); return ( julianDay - ParquetSpec.JULIAN_DAY_OF_EPOCH ) * 24L * 60L * 60L * 1000L + timeOfDayNanos / 1000000; }
Example 2
Source File: DrillParquetGroupConverter.java From Bats with Apache License 2.0 | 5 votes |
@Override public void addBinary(Binary value) { final byte[] input = value.getBytes(); holder.months = ParquetReaderUtility.getIntFromLEBytes(input, 0); holder.days = ParquetReaderUtility.getIntFromLEBytes(input, 4); holder.milliseconds = ParquetReaderUtility.getIntFromLEBytes(input, 8); writer.write(holder); }
Example 3
Source File: ParquetTimestampUtils.java From presto with Apache License 2.0 | 5 votes |
/** * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos). * * @param timestampBinary INT96 parquet timestamp * @return timestamp in millis, GMT timezone */ public static long getTimestampMillis(Binary timestampBinary) { if (timestampBinary.length() != 12) { throw new PrestoException(NOT_SUPPORTED, "Parquet timestamp must be 12 bytes, actual " + timestampBinary.length()); } byte[] bytes = timestampBinary.getBytes(); // little endian encoding - need to invert byte order long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]); return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND); }
Example 4
Source File: ParquetTimestampUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos). * * @param timestampBinary INT96 parquet timestamp * @return timestamp in millis, GMT timezone */ public static long getTimestampMillis(Binary timestampBinary) { if (timestampBinary.length() != 12) { throw new IllegalArgumentException("Parquet timestamp must be 12 bytes, actual " + timestampBinary.length()); } byte[] bytes = timestampBinary.getBytes(); // little endian encoding - need to invert byte order long timeOfDayNanos = ByteBuffer.wrap(new byte[] {bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]}).getLong(); int julianDay = ByteBuffer.wrap(new byte[] {bytes[11], bytes[10], bytes[9], bytes[8]}).getInt(); return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND); }
Example 5
Source File: ParquetGroupConverter.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override public void addBinary(Binary value) { final int length = value.length(); final byte[] bytes = value.getBytes(); /* set the bytes in LE format in the buffer of decimal vector, we will swap * the bytes while writing into the vector. */ writer.writeBigEndianBytesToDecimal(bytes, new ArrowType.Decimal(holder.precision, holder.scale)); setWritten(); }
Example 6
Source File: ParquetTimestampUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos). * * @param timestampBinary INT96 parquet timestamp * @return timestamp in millis, GMT timezone */ public static long getTimestampMillis(Binary timestampBinary) { if (timestampBinary.length() != 12) { throw new IllegalArgumentException("Parquet timestamp must be 12 bytes, actual " + timestampBinary.length()); } byte[] bytes = timestampBinary.getBytes(); // little endian encoding - need to invert byte order long timeOfDayNanos = ByteBuffer.wrap(new byte[] {bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]}).getLong(); int julianDay = ByteBuffer.wrap(new byte[] {bytes[11], bytes[10], bytes[9], bytes[8]}).getInt(); return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND); }
Example 7
Source File: DeltaByteArrayWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writeBytes(Binary v) { int i = 0; byte[] vb = v.getBytes(); int length = previous.length < vb.length ? previous.length : vb.length; // find the number of matching prefix bytes between this value and the previous one for(i = 0; (i < length) && (previous[i] == vb[i]); i++); prefixLengthWriter.writeInteger(i); suffixWriter.writeBytes(v.slice(i, vb.length - i)); previous = vb; }
Example 8
Source File: DecimalUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
public static BigDecimal binaryToDecimal(Binary value, int precision, int scale) { /* * Precision <= 18 checks for the max number of digits for an unscaled long, * else treat with big integer conversion */ if (precision <= 18) { ByteBuffer buffer = value.toByteBuffer(); byte[] bytes = buffer.array(); int start = buffer.arrayOffset() + buffer.position(); int end = buffer.arrayOffset() + buffer.limit(); long unscaled = 0L; int i = start; while ( i < end ) { unscaled = ( unscaled << 8 | bytes[i] & 0xff ); i++; } int bits = 8*(end - start); long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits); if (unscaledNew <= -pow(10,18) || unscaledNew >= pow(10,18)) { return new BigDecimal(unscaledNew); } else { return BigDecimal.valueOf(unscaledNew / pow(10,scale)); } } else { return new BigDecimal(new BigInteger(value.getBytes()), scale); } }
Example 9
Source File: ParquetConverter.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
static BigDecimal binaryToDecimal( Binary value, int precision, int scale ) { /* * Precision <= 18 checks for the max number of digits for an unscaled long, * else treat with big integer conversion */ if ( precision <= 18 ) { ByteBuffer buffer = value.toByteBuffer(); byte[] bytes = buffer.array(); int start = buffer.arrayOffset() + buffer.position(); int end = buffer.arrayOffset() + buffer.limit(); long unscaled = 0L; int i = start; while ( i < end ) { unscaled = ( unscaled << 8 | bytes[ i ] & 0xff ); i++; } int bits = 8 * ( end - start ); long unscaledNew = ( unscaled << ( 64 - bits ) ) >> ( 64 - bits ); if ( unscaledNew <= -pow( 10, 18 ) || unscaledNew >= pow( 10, 18 ) ) { return new BigDecimal( unscaledNew ); } else { return BigDecimal.valueOf( unscaledNew / pow( 10, scale ) ); } } else { return new BigDecimal( new BigInteger( value.getBytes() ), scale ); } }
Example 10
Source File: AvroConverters.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public byte[] convert(Binary binary) { return binary.getBytes(); }
Example 11
Source File: AvroConverters.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public Utf8 convert(Binary binary) { return new Utf8(binary.getBytes()); }