parquet.io.api.Binary Java Examples
The following examples show how to use
parquet.io.api.Binary.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TPCHQuery3Parquet.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
private static DataSet<Tuple2<Void, CustomerTable>> getCustomerDataSet(ExecutionEnvironment env) throws IOException { Job job = Job.getInstance(); ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class); job.getConfiguration().set("parquet.thrift.column.filter", "ID;MKTSEGMENT"); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, CustomerTable.class, job); // Filter market segment "AUTOMOBILE" BinaryColumn mktsegment = binaryColumn("MKTSEGMENT"); FilterPredicate mktsegmentPred = eq(mktsegment, Binary.fromString("AUTOMOBILE")); ParquetInputFormat.setFilterPredicate(job.getConfiguration(), mktsegmentPred); ParquetThriftInputFormat.addInputPath(job, new Path(customerPath)); DataSet<Tuple2<Void, CustomerTable>> data = env.createInput(hadoopInputFormat); return data; }
Example #2
Source File: ParquetThriftExample.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
public static DataSet<Tuple2<Void, Person>> readThrift(ExecutionEnvironment env, String inputPath) throws IOException { Job job = Job.getInstance(); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, Person .class, job); // schema projection: don't read attributes id and email job.getConfiguration().set("parquet.thrift.column.filter", "name;id;email;phone/number"); FileInputFormat.addInputPath(job, new Path(inputPath)); // push down predicates: get all persons with name = "Felix" BinaryColumn name = binaryColumn("name"); FilterPredicate namePred = eq(name, Binary.fromString("Felix")); ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred); DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat); return data; }
Example #3
Source File: SimpleRecordConverter.java From parquet-tools with Apache License 2.0 | 6 votes |
@Override public void addBinary(Binary value) { byte[] data = value.getBytes(); if (data == null) { record.add(name, null); return; } if (data != null) { try { CharBuffer buffer = UTF8_DECODER.decode(value.toByteBuffer()); record.add(name, buffer.toString()); return; } catch (Throwable th) { } } record.add(name, value.getBytes()); }
Example #4
Source File: ParquetConverter.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
private static long dateFromInt96( Binary value ) { byte[] readBuffer = value.getBytes(); if ( readBuffer.length != 12 ) { throw new RuntimeException( "Invalid byte array length for INT96" ); } long timeOfDayNanos = ( ( (long) readBuffer[ 7 ] << 56 ) + ( (long) ( readBuffer[ 6 ] & 255 ) << 48 ) + ( (long) ( readBuffer[ 5 ] & 255 ) << 40 ) + ( (long) ( readBuffer[ 4 ] & 255 ) << 32 ) + ( (long) ( readBuffer[ 3 ] & 255 ) << 24 ) + ( ( readBuffer[ 2 ] & 255 ) << 16 ) + ( ( readBuffer[ 1 ] & 255 ) << 8 ) + ( readBuffer[ 0 ] & 255 ) ); int julianDay = ( (int) ( readBuffer[ 11 ] & 255 ) << 24 ) + ( ( readBuffer[ 10 ] & 255 ) << 16 ) + ( ( readBuffer[ 9 ] & 255 ) << 8 ) + ( readBuffer[ 8 ] & 255 ); return ( julianDay - ParquetSpec.JULIAN_DAY_OF_EPOCH ) * 24L * 60L * 60L * 1000L + timeOfDayNanos / 1000000; }
Example #5
Source File: DumpCommand.java From parquet-tools with Apache License 2.0 | 5 votes |
public static String binaryToString(Binary value) { byte[] data = value.getBytes(); if (data == null) return null; try { CharBuffer buffer = UTF8_DECODER.decode(value.toByteBuffer()); return buffer.toString(); } catch (Throwable th) { } return "<bytes...>"; }
Example #6
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public void add(int fieldIndex, Binary value) { switch (this.getType().getType(fieldIndex).asPrimitiveType().getPrimitiveTypeName()) { case BINARY: this.add(fieldIndex, new BinaryValue(value)); break; case INT96: this.add(fieldIndex, new Int96Value(value)); break; default: throw new UnsupportedOperationException( this.getType().asPrimitiveType().getName() + " not supported for Binary"); } }
Example #7
Source File: ParquetProtobufExample.java From parquet-flinktacular with Apache License 2.0 | 5 votes |
public static DataSet<Tuple2<Void, Person.Builder>> readProtobuf(ExecutionEnvironment env, String inputPath) throws IOException { Job job = Job.getInstance(); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ProtoParquetInputFormat(), Void.class, Person .Builder.class, job); FileInputFormat.addInputPath(job, new Path(inputPath)); //native predicate push down: read only records which satisfy a given constraint BinaryColumn name = binaryColumn("name"); FilterPredicate namePred = eq(name, Binary.fromString("Felix")); ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred); //schema projection: don't read type of phone type attribute String projection = "message Person {\n" + " required binary name (UTF8);\n" + " required int32 id;\n" + " optional binary email (UTF8);\n" + " repeated group phone {\n" + " required binary number (UTF8);\n" + " }\n" + "}"; ProtoParquetInputFormat.setRequestedProjection(job, projection); DataSet<Tuple2<Void, Person.Builder>> data = env.createInput(hadoopInputFormat); return data; }
Example #8
Source File: ParquetConverter.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
static BigDecimal binaryToDecimal( Binary value, int precision, int scale ) { /* * Precision <= 18 checks for the max number of digits for an unscaled long, * else treat with big integer conversion */ if ( precision <= 18 ) { ByteBuffer buffer = value.toByteBuffer(); byte[] bytes = buffer.array(); int start = buffer.arrayOffset() + buffer.position(); int end = buffer.arrayOffset() + buffer.limit(); long unscaled = 0L; int i = start; while ( i < end ) { unscaled = ( unscaled << 8 | bytes[ i ] & 0xff ); i++; } int bits = 8 * ( end - start ); long unscaledNew = ( unscaled << ( 64 - bits ) ) >> ( 64 - bits ); if ( unscaledNew <= -pow( 10, 18 ) || unscaledNew >= pow( 10, 18 ) ) { return new BigDecimal( unscaledNew ); } else { return BigDecimal.valueOf( unscaledNew / pow( 10, scale ) ); } } else { return new BigDecimal( new BigInteger( value.getBytes() ), scale ); } }
Example #9
Source File: JsonElementConversionFactory.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Override BinaryValue convertField(JsonElement value) { return new BinaryValue(Binary.fromString(value.getAsString())); }
Example #10
Source File: SimpleRecordConverter.java From parquet-tools with Apache License 2.0 | 4 votes |
@Override public void addBinary(Binary value) { record.add(name, value.toStringUsingUTF8()); }
Example #11
Source File: DumpCommand.java From parquet-tools with Apache License 2.0 | 4 votes |
public static BigInteger binaryToBigInteger(Binary value) { byte[] data = value.getBytes(); if (data == null) return null; return new BigInteger(data); }
Example #12
Source File: SliceDictionary.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
@Override public Binary decodeToBinary(int id) { return Binary.fromConstantByteBuffer(slice[id].toByteBuffer()); }
Example #13
Source File: SliceDictionary.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
@Override public Binary decodeToBinary(int id) { return Binary.fromConstantByteBuffer(slice[id].toByteBuffer()); }
Example #14
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 4 votes |
public void add(int fieldIndex, String value) { this.add(fieldIndex, new BinaryValue(Binary.fromString(value))); }
Example #15
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 4 votes |
public Binary getInt96(int fieldIndex, int index) { return ((Int96Value) this.getValue(fieldIndex, index)).getInt96(); }
Example #16
Source File: ParquetGroup.java From incubator-gobblin with Apache License 2.0 | 4 votes |
public Binary getBinary(int fieldIndex, int index) { return ((BinaryValue) this.getValue(fieldIndex, index)).getBinary(); }
Example #17
Source File: ParquetAvroExample.java From parquet-flinktacular with Apache License 2.0 | 4 votes |
public static DataSet<Tuple2<Void, Person>> readAvro(ExecutionEnvironment env, String inputPath) throws IOException { Job job = Job.getInstance(); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new AvroParquetInputFormat(), Void.class, Person .class, job); FileInputFormat.addInputPath(job, new Path(inputPath)); // schema projection: don't read type of phonenumber Schema phone = Schema.createRecord("PhoneNumber", null, null, false); phone.setFields(Arrays.asList( new Schema.Field("number", Schema.create(Schema.Type.BYTES), null, null))); Schema array = Schema.createArray(phone); Schema union = Schema.createUnion(Lists.newArrayList(Schema.create(Schema.Type.BYTES), Schema.create(Schema .Type .NULL))); Schema projection = Schema.createRecord("Person", null, null, false); projection.setFields( Arrays.asList( new Schema.Field("name", Schema.create(Schema.Type.BYTES), null, null), new Schema.Field("id", Schema.create(Schema.Type.INT), null, null), new Schema.Field("email", union, null, null), new Schema.Field("phone", array, null, null) ) ); AvroParquetInputFormat.setRequestedProjection(job, projection); // push down predicates: get all persons with name = "Felix" BinaryColumn name = binaryColumn("name"); FilterPredicate namePred = eq(name, Binary.fromString("Felix")); ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred); DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat); return data; }