Java Code Examples for org.apache.parquet.schema.Type
The following examples show how to use
org.apache.parquet.schema.Type. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: parquet-mr Source File: ProtoWriteSupport.java License: Apache License 2.0 | 6 votes |
private FieldWriter createWriter(FieldDescriptor fieldDescriptor, Type type) { switch (fieldDescriptor.getJavaType()) { case STRING: return new StringWriter() ; case MESSAGE: return createMessageWriter(fieldDescriptor, type); case INT: return new IntWriter(); case LONG: return new LongWriter(); case FLOAT: return new FloatWriter(); case DOUBLE: return new DoubleWriter(); case ENUM: return new EnumWriter(); case BOOLEAN: return new BooleanWriter(); case BYTE_STRING: return new BinaryWriter(); } return unknownType(fieldDescriptor);//should not be executed, always throws exception. }
Example 2
Source Project: parquet-mr Source File: SimpleGroupConverter.java License: Apache License 2.0 | 6 votes |
SimpleGroupConverter(SimpleGroupConverter parent, int index, GroupType schema) { this.parent = parent; this.index = index; converters = new Converter[schema.getFieldCount()]; for (int i = 0; i < converters.length; i++) { final Type type = schema.getType(i); if (type.isPrimitive()) { converters[i] = new SimplePrimitiveConverter(this, i); } else { converters[i] = new SimpleGroupConverter(this, i, type.asGroupType()); } } }
Example 3
Source Project: parquet-mr Source File: ThriftSchemaConverter.java License: Apache License 2.0 | 6 votes |
/** * Returns whether the given type is the element type of a list or is a * synthetic group with one field that is the element type. This is * determined by checking whether the type can be a synthetic group and by * checking whether a potential synthetic group matches the expected * ThriftField. * <p> * This method never guesses because the expected ThriftField is known. * * @param repeatedType a type that may be the element type * @param thriftElement the expected Schema for list elements * @return {@code true} if the repeatedType is the element schema */ static boolean isListElementType(Type repeatedType, ThriftField thriftElement) { if (repeatedType.isPrimitive() || (repeatedType.asGroupType().getFieldCount() != 1) || (repeatedType.asGroupType().getType(0).isRepetition(REPEATED))) { // The repeated type must be the element type because it is an invalid // synthetic wrapper. Must be a group with one optional or required field return true; } else if (thriftElement != null && thriftElement.getType() instanceof StructType) { Set<String> fieldNames = new HashSet<String>(); for (ThriftField field : ((StructType) thriftElement.getType()).getChildren()) { fieldNames.add(field.getName()); } // If the repeated type is a subset of the structure of the ThriftField, // then it must be the element type. return fieldNames.contains(repeatedType.asGroupType().getFieldName(0)); } return false; }
Example 4
Source Project: parquet-mr Source File: SimpleGroup.java License: Apache License 2.0 | 6 votes |
private StringBuilder appendToString(StringBuilder builder, String indent) { int i = 0; for (Type field : schema.getFields()) { String name = field.getName(); List<Object> values = data[i]; ++i; if (values != null && !values.isEmpty()) { for (Object value : values) { builder.append(indent).append(name); if (value == null) { builder.append(": NULL\n"); } else if (value instanceof Group) { builder.append('\n'); ((SimpleGroup) value).appendToString(builder, indent + " "); } else { builder.append(": ").append(value.toString()).append('\n'); } } } } return builder; }
Example 5
Source Project: datacollector Source File: AvroSchemaConverterLogicalTypesPre19.java License: Apache License 2.0 | 6 votes |
private Schema convertFields(String name, List<Type> parquetFields) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (Type parquetType : parquetFields) { Schema fieldSchema = convertField(parquetType); if (parquetType.isRepetition(REPEATED)) { throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType); } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) { fields.add(new Schema.Field( parquetType.getName(), optional(fieldSchema), null, NullNode.getInstance())); } else { // REQUIRED fields.add(new Schema.Field(parquetType.getName(), fieldSchema, null, null)); } } Schema schema = Schema.createRecord(name, null, null, false); schema.setFields(fields); return schema; }
Example 6
Source Project: incubator-gobblin Source File: ParquetGroup.java License: Apache License 2.0 | 6 votes |
public String toString(String indent) { StringBuilder result = new StringBuilder(); int i = 0; for (Type field : this.schema.getFields()) { String name = field.getName(); List<Object> values = this.data[i]; for (Object value : values) { result.append(indent).append(name); if (value == null) { result.append(": NULL\n"); } else if (value instanceof Group) { result.append("\n").append(((ParquetGroup) value).toString(indent + " ")); } else { result.append(": ").append(value.toString()).append("\n"); } } i++; } return result.toString(); }
Example 7
Source Project: garmadon Source File: HiveClientTest.java License: Apache License 2.0 | 6 votes |
@Test public void createTableWithoutIssue() throws SQLException { PrimitiveType appId = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "app_id"); MessageType schema = new MessageType("fs", appId); String table = "fs"; String location = "file:" + hdfsTemp + "/garmadon_database/fs"; HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon", hdfsTemp + "/garmadon_database"); hiveClient.createTableIfNotExist(table, schema, location); HashMap<String, String> result = getResultHashTableDesc(hiveClient, table); assertEquals(location, result.get("Location")); assertEquals("EXTERNAL_TABLE", result.get("Table Type").trim()); assertEquals("string", result.get("day")); assertEquals("string", result.get("app_id")); }
Example 8
Source Project: parquet-mr Source File: DataWritableGroupConverter.java License: Apache License 2.0 | 6 votes |
public DataWritableGroupConverter(final GroupType selectedGroupType, final HiveGroupConverter parent, final int index, final GroupType containingGroupType) { this.parent = parent; this.index = index; final int totalFieldCount = containingGroupType.getFieldCount(); final int selectedFieldCount = selectedGroupType.getFieldCount(); currentArr = new Object[totalFieldCount]; converters = new Converter[selectedFieldCount]; List<Type> selectedFields = selectedGroupType.getFields(); for (int i = 0; i < selectedFieldCount; i++) { Type subtype = selectedFields.get(i); if (containingGroupType.getFields().contains(subtype)) { converters[i] = getConverterFromDescription(subtype, containingGroupType.getFieldIndex(subtype.getName()), this); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype); } } }
Example 9
Source Project: parquet-mr Source File: GroupWriter.java License: Apache License 2.0 | 6 votes |
private void writeGroup(Group group, GroupType type) { int fieldCount = type.getFieldCount(); for (int field = 0; field < fieldCount; ++field) { int valueCount = group.getFieldRepetitionCount(field); if (valueCount > 0) { Type fieldType = type.getType(field); String fieldName = fieldType.getName(); recordConsumer.startField(fieldName, field); for (int index = 0; index < valueCount; ++index) { if (fieldType.isPrimitive()) { group.writeValue(field, index, recordConsumer); } else { recordConsumer.startGroup(); writeGroup(group.getGroup(field, index), fieldType.asGroupType()); recordConsumer.endGroup(); } } recordConsumer.endField(fieldName, field); } } }
Example 10
Source Project: parquet-mr Source File: ThriftRecordConverter.java License: Apache License 2.0 | 6 votes |
public ElementConverter(String listName, List<TProtocol> listEvents, GroupType repeatedType, ThriftField thriftElement) { this.listEvents = listEvents; this.elementEvents = new ArrayList<TProtocol>(); Type elementType = repeatedType.getType(0); if (elementType.isRepetition(Type.Repetition.OPTIONAL)) { if (ignoreNullElements) { LOG.warn("List " + listName + " has optional elements: null elements are ignored."); } else { throw new ParquetDecodingException("Cannot read list " + listName + " with optional elements: set " + IGNORE_NULL_LIST_ELEMENTS + " to ignore nulls."); } } elementConverter = newConverter(elementEvents, elementType, thriftElement); }
Example 11
Source Project: parquet-mr Source File: TestPruneColumnsCommand.java License: Apache License 2.0 | 6 votes |
@Test public void testPruneMultiColumns() throws Exception { // Create Parquet file String inputFile = createParquetFile("input"); String outputFile = createTempFile("output"); // Remove columns String cargs[] = {inputFile, outputFile, "Name", "Gender"}; executeCommandLine(cargs); // Verify the schema are not changed for the columns not pruned ParquetMetadata pmd = ParquetFileReader.readFooter(conf, new Path(outputFile), ParquetMetadataConverter.NO_FILTER); MessageType schema = pmd.getFileMetaData().getSchema(); List<Type> fields = schema.getFields(); assertEquals(fields.size(), 2); assertEquals(fields.get(0).getName(), "DocId"); assertEquals(fields.get(1).getName(), "Links"); List<Type> subFields = fields.get(1).asGroupType().getFields(); assertEquals(subFields.size(), 2); assertEquals(subFields.get(0).getName(), "Backward"); assertEquals(subFields.get(1).getName(), "Forward"); // Verify the data are not changed for the columns not pruned List<String> prunePaths = Arrays.asList("Name", "Gender"); validateColumns(inputFile, prunePaths); }
Example 12
Source Project: parquet-mr Source File: AvroRecordConverter.java License: Apache License 2.0 | 6 votes |
public AvroCollectionConverter(ParentValueContainer parent, GroupType type, Schema avroSchema, GenericData model, Class<?> containerClass) { this.parent = parent; this.avroSchema = avroSchema; this.containerClass = containerClass; Schema elementSchema = AvroSchemaConverter.getNonNull(avroSchema.getElementType()); Type repeatedType = type.getType(0); // always determine whether the repeated type is the element type by // matching it against the element schema. if (isElementType(repeatedType, elementSchema)) { // the element type is the repeated type (and required) converter = newConverter(elementSchema, repeatedType, model, new ParentValueContainer() { @Override @SuppressWarnings("unchecked") public void add(Object value) { container.add(value); } }); } else { // the element is wrapped in a synthetic group and may be optional converter = new ElementConverter(repeatedType.asGroupType(), elementSchema, model); } }
Example 13
Source Project: iceberg Source File: ParquetSchemaUtil.java License: Apache License 2.0 | 6 votes |
/** * Prunes columns from a Parquet file schema that was written without field ids. * <p> * Files that were written without field ids are read assuming that schema evolution preserved * column order. Deleting columns was not allowed. * <p> * The order of columns in the resulting Parquet schema matches the Parquet file. * * @param fileSchema schema from a Parquet file that does not have field ids. * @param expectedSchema expected schema * @return a parquet schema pruned using the expected schema */ public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) { Set<Integer> selectedIds = Sets.newHashSet(); for (Types.NestedField field : expectedSchema.columns()) { selectedIds.add(field.fieldId()); } MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage(); int ordinal = 1; for (Type type : fileSchema.getFields()) { if (selectedIds.contains(ordinal)) { builder.addField(type.withId(ordinal)); } ordinal += 1; } return builder.named(fileSchema.getName()); }
Example 14
Source Project: parquet-mr Source File: AvroWriteSupport.java License: Apache License 2.0 | 6 votes |
private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) { List<Type> fields = schema.getFields(); List<Schema.Field> avroFields = avroSchema.getFields(); int index = 0; // parquet ignores Avro nulls, so index may differ for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) { Schema.Field avroField = avroFields.get(avroIndex); if (avroField.schema().getType().equals(Schema.Type.NULL)) { continue; } Type fieldType = fields.get(index); Object value = model.getField(record, avroField.name(), avroIndex); if (value != null) { recordConsumer.startField(fieldType.getName(), index); writeValue(fieldType, avroField.schema(), value); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + avroField.name()); } index++; } }
Example 15
Source Project: dremio-oss Source File: LogicalListL2Converter.java License: Apache License 2.0 | 6 votes |
@Override protected void addChildConverter(String fieldName, OutputMutator mutator, List<Field> arrowSchema, Iterator<SchemaPath> colIterator, Type type, Function<String, String> childNameResolver) { final String nameForChild = "inner"; // Column name to ID mapping creates child entry as 'columnName'.list.element // So, we will append 'list.element' so that name to ID matching works correctly final String fullChildName = fieldName.concat(".").concat("list.element"); if (type.isPrimitive()) { converters.add( getConverterForType(fullChildName, type.asPrimitiveType())); } else { final GroupType groupType = type.asGroupType(); Collection<SchemaPath> c = Lists.newArrayList(colIterator); if (arrowSchema != null) { converters.add( groupConverterFromArrowSchema(fullChildName, "$data$", groupType, c)); } else { converters.add( defaultGroupConverter(fullChildName, mutator, groupType, c, null)); } } }
Example 16
Source Project: datacollector Source File: AvroSchemaConverter190Int96Avro17.java License: Apache License 2.0 | 5 votes |
/** * Implements the rules for interpreting existing data from the logical type * spec for the LIST annotation. This is used to produce the expected schema. * <p> * The AvroArrayConverter will decide whether the repeated type is the array * element type by testing whether the element schema and repeated type are * the same. This ensures that the LIST rules are followed when there is no * schema and that a schema can be provided to override the default behavior. */ private boolean isElementType(Type repeatedType, String parentName) { return ( // can't be a synthetic layer because it would be invalid repeatedType.isPrimitive() || repeatedType.asGroupType().getFieldCount() > 1 || repeatedType.asGroupType().getType(0).isRepetition(REPEATED) || // known patterns without the synthetic layer repeatedType.getName().equals("array") || repeatedType.getName().equals(parentName + "_tuple") || // default assumption assumeRepeatedIsListElement ); }
Example 17
Source Project: parquet-mr Source File: MetadataUtils.java License: Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath) { String name = Strings.repeat(".", depth) + type.getName(); Repetition rep = type.getRepetition(); int fcount = type.getFieldCount(); out.format("%s: %s F:%d%n", name, rep, fcount); cpath.add(type.getName()); for (Type ftype : type.getFields()) { showDetails(out, ftype, depth + 1, container, cpath); } cpath.remove(cpath.size() - 1); }
Example 18
Source Project: parquet-mr Source File: TupleConverter.java License: Apache License 2.0 | 5 votes |
public TupleConverter(GroupType parquetSchema, Schema pigSchema, boolean elephantBirdCompatible, boolean columnIndexAccess) { this.parquetSchema = parquetSchema; this.elephantBirdCompatible = elephantBirdCompatible; try { this.schemaSize = max(parquetSchema.getFieldCount(), pigSchema.getFields().size()); this.converters = new Converter[this.schemaSize]; for (int i = 0, c = 0; i < schemaSize; i++) { FieldSchema field = pigSchema.getField(i); if(parquetSchema.containsField(field.alias) || columnIndexAccess) { Type type = getType(columnIndexAccess, field.alias, i); if(type != null) { final int index = i; converters[c++] = newConverter(field, type, new ParentValueContainer() { @Override void add(Object value) { TupleConverter.this.set(index, value); } }, elephantBirdCompatible, columnIndexAccess); } } } } catch (FrontendException e) { throw new ParquetDecodingException("can not initialize pig converter from:\n" + parquetSchema + "\n" + pigSchema, e); } }
Example 19
Source Project: presto Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java License: Apache License 2.0 | 5 votes |
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
Example 20
Source Project: iceberg Source File: PigParquetReader.java License: Apache License 2.0 | 5 votes |
@Override public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1; return new ArrayReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader)); }
Example 21
Source Project: parquet-mr Source File: TupleConverter.java License: Apache License 2.0 | 5 votes |
@Override final public void start() { currentTuple = TF.newTuple(schemaSize); if (elephantBirdCompatible) { try { int i = 0; for (Type field : parquetSchema.getFields()) { if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) { PrimitiveType primitiveType = field.asPrimitiveType(); switch (primitiveType.getPrimitiveTypeName()) { case INT32: currentTuple.set(i, I32_ZERO); break; case INT64: currentTuple.set(i, I64_ZERO); break; case FLOAT: currentTuple.set(i, FLOAT_ZERO); break; case DOUBLE: currentTuple.set(i, DOUBLE_ZERO); break; case BOOLEAN: currentTuple.set(i, I32_ZERO); break; } } ++ i; } } catch (ExecException e) { throw new RuntimeException(e); } } }
Example 22
Source Project: presto Source File: SingleLevelArraySchemaConverter.java License: Apache License 2.0 | 5 votes |
private static Type[] convertTypes(List<String> columnNames, List<TypeInfo> columnTypes) { if (columnNames.size() != columnTypes.size()) { throw new IllegalStateException("Mismatched Hive columns and types. Hive columns names" + " found : " + columnNames + " . And Hive types found : " + columnTypes); } Type[] types = new Type[columnNames.size()]; for (int i = 0; i < columnNames.size(); ++i) { types[i] = convertType(columnNames.get(i), columnTypes.get(i)); } return types; }
Example 23
Source Project: parquet-mr Source File: List3Levels.java License: Apache License 2.0 | 5 votes |
/** * Will validate the structure of the list * @param list the Parquet List */ public List3Levels(GroupType list) { if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.list = list; Type repeatedField = list.getFields().get(0); if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.repeated = repeatedField.asGroupType(); this.element = repeated.getFields().get(0); }
Example 24
Source Project: dremio-oss Source File: ParquetReaderUtility.java License: Apache License 2.0 | 5 votes |
/** * Converts {@link ColumnDescriptor} to {@link SchemaPath} and converts any parquet LOGICAL LIST to something * the execution engine can understand (removes the extra 'list' and 'element' fields from the name) */ public static List<String> convertColumnDescriptor(final MessageType schema, final ColumnDescriptor columnDescriptor) { List<String> path = Lists.newArrayList(columnDescriptor.getPath()); // go through the path and find all logical lists int index = 0; Type type = schema; while (!type.isPrimitive()) { // don't bother checking the last element in the path as it is a primitive type type = type.asGroupType().getType(path.get(index)); if (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) { // remove 'list' type = type.asGroupType().getType(path.get(index+1)); path.remove(index+1); // remove 'element' type = type.asGroupType().getType(path.get(index+1)); //handle nested list case while (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) { // current 'list'.'element' entry path.remove(index+1); // nested 'list' entry type = type.asGroupType().getType(path.get(index+1)); path.remove(index+1); type = type.asGroupType().getType(path.get(index+1)); } // final 'list'.'element' entry path.remove(index+1); } index++; } return path; }
Example 25
Source Project: presto Source File: MapKeyValuesSchemaConverter.java License: Apache License 2.0 | 5 votes |
private static GroupType convertMapType(String name, MapTypeInfo typeInfo) { Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED); Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo()); return mapType(Repetition.OPTIONAL, name, "map", keyType, valueType); }
Example 26
Source Project: presto Source File: MapKeyValuesSchemaConverter.java License: Apache License 2.0 | 5 votes |
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) { //support projection only on key of a map if (valueType == null) { return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType)); } else { if (!valueType.getName().equals("value")) { throw new RuntimeException(valueType.getName() + " should be value"); } return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType, valueType)); } }
Example 27
Source Project: parquet-mr Source File: SimpleRecordConverter.java License: Apache License 2.0 | 5 votes |
private Converter createConverter(Type field) { LogicalTypeAnnotation ltype = field.getLogicalTypeAnnotation(); if (field.isPrimitive()) { if (ltype != null) { return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() { @Override public Optional<Converter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { return of(new StringConverter(field.getName())); } @Override public Optional<Converter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { int scale = decimalLogicalType.getScale(); return of(new DecimalConverter(field.getName(), scale)); } }).orElse(new SimplePrimitiveConverter(field.getName())); } return new SimplePrimitiveConverter(field.getName()); } GroupType groupType = field.asGroupType(); if (ltype != null) { return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() { @Override public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) { return of(new SimpleMapRecordConverter(groupType, field.getName(), SimpleRecordConverter.this)); } @Override public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) { return of(new SimpleListRecordConverter(groupType, field.getName(), SimpleRecordConverter.this)); } }).orElse(new SimpleRecordConverter(groupType, field.getName(), this)); } return new SimpleRecordConverter(groupType, field.getName(), this); }
Example 28
Source Project: pxf Source File: ParquetFileAccessor.java License: Apache License 2.0 | 5 votes |
/** * Opens the resource for read. * * @throws IOException if opening the resource failed */ @Override public boolean openForRead() throws IOException { file = new Path(context.getDataSource()); FileSplit fileSplit = HdfsUtilities.parseFileSplit(context); // Read the original schema from the parquet file MessageType originalSchema = getSchema(file, fileSplit); // Get a map of the column name to Types for the given schema Map<String, Type> originalFieldsMap = getOriginalFieldsMap(originalSchema); // Get the read schema. This is either the full set or a subset (in // case of column projection) of the greenplum schema. MessageType readSchema = buildReadSchema(originalFieldsMap, originalSchema); // Get the record filter in case of predicate push-down FilterCompat.Filter recordFilter = getRecordFilter(context.getFilterString(), originalFieldsMap, readSchema); // add column projection configuration.set(PARQUET_READ_SCHEMA, readSchema.toString()); fileReader = ParquetReader.builder(new GroupReadSupport(), file) .withConf(configuration) // Create reader for a given split, read a range in file .withFileRange(fileSplit.getStart(), fileSplit.getStart() + fileSplit.getLength()) .withFilter(recordFilter) .build(); context.setMetadata(readSchema); return true; }
Example 29
Source Project: parquet-mr Source File: ThriftRecordConverter.java License: Apache License 2.0 | 5 votes |
private StructConverter(List<TProtocol> events, GroupType parquetSchema, ThriftField field) { this.events = events; this.name = field.getName(); this.tStruct = new TStruct(name); this.thriftType = (StructType)field.getType(); this.schemaSize = parquetSchema.getFieldCount(); this.converters = new Converter[this.schemaSize]; List<ThriftField> thriftChildren = thriftType.getChildren(); for (int i = 0; i < schemaSize; i++) { Type schemaType = parquetSchema.getType(i); String fieldName = schemaType.getName(); ThriftField matchingThrift = null; for (ThriftField childField: thriftChildren) { String thriftChildName = childField.getName(); if (thriftChildName != null && thriftChildName.equalsIgnoreCase(fieldName)) { matchingThrift = childField; break; } } if (matchingThrift == null) { // this means the file did not contain that field // it will never be populated in this instance // other files might populate it continue; } if (schemaType.isPrimitive()) { converters[i] = new PrimitiveFieldHandler(newConverter(events, schemaType, matchingThrift).asPrimitiveConverter(), matchingThrift, events); } else { converters[i] = new GroupFieldhandler(newConverter(events, schemaType, matchingThrift).asGroupConverter(), matchingThrift, events); } } }
Example 30
Source Project: flink Source File: RowConverter.java License: Apache License 2.0 | 5 votes |
ArrayConverter(Type elementType, Class elementClass, TypeInformation elementTypeInfo, ParentDataHolder parentDataHolder, int pos) { this.elementClass = elementClass; this.parentDataHolder = parentDataHolder; this.pos = pos; if (elementClass.equals(Row.class)) { this.elementConverter = createConverter(elementType, 0, elementTypeInfo, this); } else { this.elementConverter = new RowConverter.RowPrimitiveConverter(elementType, this, 0); } }