Java Code Examples for org.apache.parquet.schema.Type

The following examples show how to use org.apache.parquet.schema.Type. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: parquet-mr   Source File: ProtoWriteSupport.java    License: Apache License 2.0 6 votes vote down vote up
private FieldWriter createWriter(FieldDescriptor fieldDescriptor, Type type) {

      switch (fieldDescriptor.getJavaType()) {
        case STRING: return new StringWriter() ;
        case MESSAGE: return createMessageWriter(fieldDescriptor, type);
        case INT: return new IntWriter();
        case LONG: return new LongWriter();
        case FLOAT: return new FloatWriter();
        case DOUBLE: return new DoubleWriter();
        case ENUM: return new EnumWriter();
        case BOOLEAN: return new BooleanWriter();
        case BYTE_STRING: return new BinaryWriter();
      }

      return unknownType(fieldDescriptor);//should not be executed, always throws exception.
    }
 
Example 2
Source Project: parquet-mr   Source File: SimpleGroupConverter.java    License: Apache License 2.0 6 votes vote down vote up
SimpleGroupConverter(SimpleGroupConverter parent, int index, GroupType schema) {
  this.parent = parent;
  this.index = index;

  converters = new Converter[schema.getFieldCount()];

  for (int i = 0; i < converters.length; i++) {
    final Type type = schema.getType(i);
    if (type.isPrimitive()) {
      converters[i] = new SimplePrimitiveConverter(this, i);
    } else {
      converters[i] = new SimpleGroupConverter(this, i, type.asGroupType());
    }

  }
}
 
Example 3
Source Project: parquet-mr   Source File: ThriftSchemaConverter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns whether the given type is the element type of a list or is a
 * synthetic group with one field that is the element type. This is
 * determined by checking whether the type can be a synthetic group and by
 * checking whether a potential synthetic group matches the expected
 * ThriftField.
 * <p>
 * This method never guesses because the expected ThriftField is known.
 *
 * @param repeatedType a type that may be the element type
 * @param thriftElement the expected Schema for list elements
 * @return {@code true} if the repeatedType is the element schema
 */
static boolean isListElementType(Type repeatedType,
                                 ThriftField thriftElement) {
  if (repeatedType.isPrimitive() ||
      (repeatedType.asGroupType().getFieldCount() != 1) ||
      (repeatedType.asGroupType().getType(0).isRepetition(REPEATED))) {
    // The repeated type must be the element type because it is an invalid
    // synthetic wrapper. Must be a group with one optional or required field
    return true;
  } else if (thriftElement != null && thriftElement.getType() instanceof StructType) {
    Set<String> fieldNames = new HashSet<String>();
    for (ThriftField field : ((StructType) thriftElement.getType()).getChildren()) {
      fieldNames.add(field.getName());
    }
    // If the repeated type is a subset of the structure of the ThriftField,
    // then it must be the element type.
    return fieldNames.contains(repeatedType.asGroupType().getFieldName(0));
  }
  return false;
}
 
Example 4
Source Project: parquet-mr   Source File: SimpleGroup.java    License: Apache License 2.0 6 votes vote down vote up
private StringBuilder appendToString(StringBuilder builder, String indent) {
  int i = 0;
  for (Type field : schema.getFields()) {
    String name = field.getName();
    List<Object> values = data[i];
    ++i;
    if (values != null && !values.isEmpty()) {
      for (Object value : values) {
        builder.append(indent).append(name);
        if (value == null) {
          builder.append(": NULL\n");
        } else if (value instanceof Group) {
          builder.append('\n');
          ((SimpleGroup) value).appendToString(builder, indent + "  ");
        } else {
          builder.append(": ").append(value.toString()).append('\n');
        }
      }
    }
  }
  return builder;
}
 
Example 5
private Schema convertFields(String name, List<Type> parquetFields) {
  List<Schema.Field> fields = new ArrayList<Schema.Field>();
  for (Type parquetType : parquetFields) {
    Schema fieldSchema = convertField(parquetType);
    if (parquetType.isRepetition(REPEATED)) {
      throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType);
    } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) {
      fields.add(new Schema.Field(
          parquetType.getName(), optional(fieldSchema), null, NullNode.getInstance()));
    } else { // REQUIRED
      fields.add(new Schema.Field(parquetType.getName(), fieldSchema, null, null));
    }
  }
  Schema schema = Schema.createRecord(name, null, null, false);
  schema.setFields(fields);
  return schema;
}
 
Example 6
Source Project: incubator-gobblin   Source File: ParquetGroup.java    License: Apache License 2.0 6 votes vote down vote up
public String toString(String indent) {
  StringBuilder result = new StringBuilder();
  int i = 0;
  for (Type field : this.schema.getFields()) {
    String name = field.getName();
    List<Object> values = this.data[i];
    for (Object value : values) {
      result.append(indent).append(name);
      if (value == null) {
        result.append(": NULL\n");
      } else if (value instanceof Group) {
        result.append("\n").append(((ParquetGroup) value).toString(indent + "  "));
      } else {
        result.append(": ").append(value.toString()).append("\n");
      }
    }
    i++;
  }
  return result.toString();
}
 
Example 7
Source Project: garmadon   Source File: HiveClientTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void createTableWithoutIssue() throws SQLException {
    PrimitiveType appId = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "app_id");

    MessageType schema = new MessageType("fs", appId);

    String table = "fs";
    String location = "file:" + hdfsTemp + "/garmadon_database/fs";
    HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon",
        hdfsTemp + "/garmadon_database");
    hiveClient.createTableIfNotExist(table, schema, location);

    HashMap<String, String> result = getResultHashTableDesc(hiveClient, table);
    assertEquals(location, result.get("Location"));
    assertEquals("EXTERNAL_TABLE", result.get("Table Type").trim());
    assertEquals("string", result.get("day"));
    assertEquals("string", result.get("app_id"));
}
 
Example 8
Source Project: parquet-mr   Source File: DataWritableGroupConverter.java    License: Apache License 2.0 6 votes vote down vote up
public DataWritableGroupConverter(final GroupType selectedGroupType,
    final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
  this.parent = parent;
  this.index = index;
  final int totalFieldCount = containingGroupType.getFieldCount();
  final int selectedFieldCount = selectedGroupType.getFieldCount();

  currentArr = new Object[totalFieldCount];
  converters = new Converter[selectedFieldCount];

  List<Type> selectedFields = selectedGroupType.getFields();
  for (int i = 0; i < selectedFieldCount; i++) {
    Type subtype = selectedFields.get(i);
    if (containingGroupType.getFields().contains(subtype)) {
      converters[i] = getConverterFromDescription(subtype,
          containingGroupType.getFieldIndex(subtype.getName()), this);
    } else {
      throw new IllegalStateException("Group type [" + containingGroupType +
          "] does not contain requested field: " + subtype);
    }
  }
}
 
Example 9
Source Project: parquet-mr   Source File: GroupWriter.java    License: Apache License 2.0 6 votes vote down vote up
private void writeGroup(Group group, GroupType type) {
  int fieldCount = type.getFieldCount();
  for (int field = 0; field < fieldCount; ++field) {
    int valueCount = group.getFieldRepetitionCount(field);
    if (valueCount > 0) {
      Type fieldType = type.getType(field);
      String fieldName = fieldType.getName();
      recordConsumer.startField(fieldName, field);
      for (int index = 0; index < valueCount; ++index) {
        if (fieldType.isPrimitive()) {
          group.writeValue(field, index, recordConsumer);
        } else {
          recordConsumer.startGroup();
          writeGroup(group.getGroup(field, index), fieldType.asGroupType());
          recordConsumer.endGroup();
        }
      }
      recordConsumer.endField(fieldName, field);
    }
  }
}
 
Example 10
Source Project: parquet-mr   Source File: ThriftRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
public ElementConverter(String listName, List<TProtocol> listEvents,
                        GroupType repeatedType, ThriftField thriftElement) {
  this.listEvents = listEvents;
  this.elementEvents = new ArrayList<TProtocol>();
  Type elementType = repeatedType.getType(0);
  if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
    if (ignoreNullElements) {
      LOG.warn("List " + listName +
          " has optional elements: null elements are ignored.");
    } else {
      throw new ParquetDecodingException("Cannot read list " + listName +
          " with optional elements: set " + IGNORE_NULL_LIST_ELEMENTS +
          " to ignore nulls.");
    }
  }
  elementConverter = newConverter(elementEvents, elementType, thriftElement);
}
 
Example 11
Source Project: parquet-mr   Source File: TestPruneColumnsCommand.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPruneMultiColumns() throws Exception {
  // Create Parquet file
  String inputFile = createParquetFile("input");
  String outputFile = createTempFile("output");

  // Remove columns
  String cargs[] = {inputFile, outputFile, "Name", "Gender"};
  executeCommandLine(cargs);

  // Verify the schema are not changed for the columns not pruned
  ParquetMetadata pmd = ParquetFileReader.readFooter(conf, new Path(outputFile), ParquetMetadataConverter.NO_FILTER);
  MessageType schema = pmd.getFileMetaData().getSchema();
  List<Type> fields = schema.getFields();
  assertEquals(fields.size(), 2);
  assertEquals(fields.get(0).getName(), "DocId");
  assertEquals(fields.get(1).getName(), "Links");
  List<Type> subFields = fields.get(1).asGroupType().getFields();
  assertEquals(subFields.size(), 2);
  assertEquals(subFields.get(0).getName(), "Backward");
  assertEquals(subFields.get(1).getName(), "Forward");

  // Verify the data are not changed for the columns not pruned
  List<String> prunePaths = Arrays.asList("Name", "Gender");
  validateColumns(inputFile, prunePaths);
}
 
Example 12
Source Project: parquet-mr   Source File: AvroRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
public AvroCollectionConverter(ParentValueContainer parent, GroupType type,
                               Schema avroSchema, GenericData model,
                               Class<?> containerClass) {
  this.parent = parent;
  this.avroSchema = avroSchema;
  this.containerClass = containerClass;
  Schema elementSchema = AvroSchemaConverter.getNonNull(avroSchema.getElementType());
  Type repeatedType = type.getType(0);
  // always determine whether the repeated type is the element type by
  // matching it against the element schema.
  if (isElementType(repeatedType, elementSchema)) {
    // the element type is the repeated type (and required)
    converter = newConverter(elementSchema, repeatedType, model, new ParentValueContainer() {
      @Override
      @SuppressWarnings("unchecked")
      public void add(Object value) {
        container.add(value);
      }
    });
  } else {
    // the element is wrapped in a synthetic group and may be optional
    converter = new ElementConverter(repeatedType.asGroupType(), elementSchema, model);
  }
}
 
Example 13
Source Project: iceberg   Source File: ParquetSchemaUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Prunes columns from a Parquet file schema that was written without field ids.
 * <p>
 * Files that were written without field ids are read assuming that schema evolution preserved
 * column order. Deleting columns was not allowed.
 * <p>
 * The order of columns in the resulting Parquet schema matches the Parquet file.
 *
 * @param fileSchema schema from a Parquet file that does not have field ids.
 * @param expectedSchema expected schema
 * @return a parquet schema pruned using the expected schema
 */
public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) {
  Set<Integer> selectedIds = Sets.newHashSet();

  for (Types.NestedField field : expectedSchema.columns()) {
    selectedIds.add(field.fieldId());
  }

  MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage();

  int ordinal = 1;
  for (Type type : fileSchema.getFields()) {
    if (selectedIds.contains(ordinal)) {
      builder.addField(type.withId(ordinal));
    }
    ordinal += 1;
  }

  return builder.named(fileSchema.getName());
}
 
Example 14
Source Project: parquet-mr   Source File: AvroWriteSupport.java    License: Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema avroSchema,
                               Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}
 
Example 15
Source Project: dremio-oss   Source File: LogicalListL2Converter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void addChildConverter(String fieldName, OutputMutator mutator, List<Field> arrowSchema, Iterator<SchemaPath> colIterator, Type type, Function<String, String> childNameResolver) {
  final String nameForChild = "inner";
  // Column name to ID mapping creates child entry as 'columnName'.list.element
  // So, we will append 'list.element' so that name to ID matching works correctly
  final String fullChildName = fieldName.concat(".").concat("list.element");
  if (type.isPrimitive()) {
    converters.add( getConverterForType(fullChildName, type.asPrimitiveType()));
  } else {
    final GroupType groupType = type.asGroupType();
    Collection<SchemaPath> c = Lists.newArrayList(colIterator);
    if (arrowSchema != null) {
      converters.add( groupConverterFromArrowSchema(fullChildName, "$data$", groupType, c));
    } else {
      converters.add( defaultGroupConverter(fullChildName, mutator, groupType, c, null));
    }
  }
}
 
Example 16
/**
 * Implements the rules for interpreting existing data from the logical type
 * spec for the LIST annotation. This is used to produce the expected schema.
 * <p>
 * The AvroArrayConverter will decide whether the repeated type is the array
 * element type by testing whether the element schema and repeated type are
 * the same. This ensures that the LIST rules are followed when there is no
 * schema and that a schema can be provided to override the default behavior.
 */
private boolean isElementType(Type repeatedType, String parentName) {
  return (
      // can't be a synthetic layer because it would be invalid
      repeatedType.isPrimitive() ||
          repeatedType.asGroupType().getFieldCount() > 1 ||
          repeatedType.asGroupType().getType(0).isRepetition(REPEATED) ||
          // known patterns without the synthetic layer
          repeatedType.getName().equals("array") ||
          repeatedType.getName().equals(parentName + "_tuple") ||
          // default assumption
          assumeRepeatedIsListElement
  );
}
 
Example 17
Source Project: parquet-mr   Source File: MetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  int fcount = type.getFieldCount();
  out.format("%s: %s F:%d%n", name, rep, fcount);

  cpath.add(type.getName());
  for (Type ftype : type.getFields()) {
    showDetails(out, ftype, depth + 1, container, cpath);
  }
  cpath.remove(cpath.size() - 1);
}
 
Example 18
Source Project: parquet-mr   Source File: TupleConverter.java    License: Apache License 2.0 5 votes vote down vote up
public TupleConverter(GroupType parquetSchema, Schema pigSchema, boolean elephantBirdCompatible, boolean columnIndexAccess) {
  this.parquetSchema = parquetSchema;
  this.elephantBirdCompatible = elephantBirdCompatible;
  try {
    this.schemaSize = max(parquetSchema.getFieldCount(), pigSchema.getFields().size());
    this.converters = new Converter[this.schemaSize];
    for (int i = 0, c = 0; i < schemaSize; i++) {
      FieldSchema field = pigSchema.getField(i);
      if(parquetSchema.containsField(field.alias) || columnIndexAccess) {
        Type type = getType(columnIndexAccess, field.alias, i);

        if(type != null) {
          final int index = i;
          converters[c++] = newConverter(field, type, new ParentValueContainer() {
            @Override
            void add(Object value) {
              TupleConverter.this.set(index, value);
            }
          }, elephantBirdCompatible, columnIndexAccess);
        }
      }

    }
  } catch (FrontendException e) {
    throw new ParquetDecodingException("can not initialize pig converter from:\n" + parquetSchema + "\n" + pigSchema, e);
  }
}
 
Example 19
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
    if (!nested.isRepetition(Repetition.REPEATED)) {
        throw new IllegalArgumentException("Nested type should be repeated: " + nested);
    }
    return new GroupType(repetition, alias, originalType, nested);
}
 
Example 20
Source Project: iceberg   Source File: PigParquetReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) {
  GroupType repeated = array.getFields().get(0).asGroupType();
  String[] repeatedPath = currentPath();

  int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1;
  int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1;

  Type elementType = repeated.getType(0);
  int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1;

  return new ArrayReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader));
}
 
Example 21
Source Project: parquet-mr   Source File: TupleConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
final public void start() {
  currentTuple = TF.newTuple(schemaSize);
  if (elephantBirdCompatible) {
    try {
      int i = 0;
      for (Type field : parquetSchema.getFields()) {
        if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
          PrimitiveType primitiveType = field.asPrimitiveType();
          switch (primitiveType.getPrimitiveTypeName()) {
          case INT32:
            currentTuple.set(i, I32_ZERO);
            break;
          case INT64:
            currentTuple.set(i, I64_ZERO);
            break;
          case FLOAT:
            currentTuple.set(i, FLOAT_ZERO);
            break;
          case DOUBLE:
            currentTuple.set(i, DOUBLE_ZERO);
            break;
          case BOOLEAN:
            currentTuple.set(i, I32_ZERO);
            break;
          }
        }
        ++ i;
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
}
 
Example 22
Source Project: presto   Source File: SingleLevelArraySchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static Type[] convertTypes(List<String> columnNames, List<TypeInfo> columnTypes)
{
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalStateException("Mismatched Hive columns and types. Hive columns names" +
                " found : " + columnNames + " . And Hive types found : " + columnTypes);
    }
    Type[] types = new Type[columnNames.size()];
    for (int i = 0; i < columnNames.size(); ++i) {
        types[i] = convertType(columnNames.get(i), columnTypes.get(i));
    }
    return types;
}
 
Example 23
Source Project: parquet-mr   Source File: List3Levels.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Will validate the structure of the list
 * @param list the Parquet List
 */
public List3Levels(GroupType list) {
  if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.list = list;
  Type repeatedField = list.getFields().get(0);
  if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.repeated = repeatedField.asGroupType();
  this.element = repeated.getFields().get(0);
}
 
Example 24
Source Project: dremio-oss   Source File: ParquetReaderUtility.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Converts {@link ColumnDescriptor} to {@link SchemaPath} and converts any parquet LOGICAL LIST to something
 * the execution engine can understand (removes the extra 'list' and 'element' fields from the name)
 */
public static List<String> convertColumnDescriptor(final MessageType schema, final ColumnDescriptor columnDescriptor) {
  List<String> path = Lists.newArrayList(columnDescriptor.getPath());

  // go through the path and find all logical lists
  int index = 0;
  Type type = schema;
  while (!type.isPrimitive()) { // don't bother checking the last element in the path as it is a primitive type
    type = type.asGroupType().getType(path.get(index));
    if (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) {
      // remove 'list'
      type = type.asGroupType().getType(path.get(index+1));
      path.remove(index+1);

      // remove 'element'
      type = type.asGroupType().getType(path.get(index+1));

      //handle nested list case
      while (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) {
        // current 'list'.'element' entry
        path.remove(index+1);

        // nested 'list' entry
        type = type.asGroupType().getType(path.get(index+1));
        path.remove(index+1);

        type = type.asGroupType().getType(path.get(index+1));
      }

      // final 'list'.'element' entry
      path.remove(index+1);

    }
    index++;
  }
  return path;
}
 
Example 25
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(Repetition.OPTIONAL, name, "map", keyType, valueType);
}
 
Example 26
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType)
{
    //support projection only on key of a map
    if (valueType == null) {
        return listWrapper(
                repetition,
                alias,
                MAP_KEY_VALUE,
                new GroupType(
                        Repetition.REPEATED,
                        mapAlias,
                        keyType));
    }
    else {
        if (!valueType.getName().equals("value")) {
            throw new RuntimeException(valueType.getName() + " should be value");
        }
        return listWrapper(
                repetition,
                alias,
                MAP_KEY_VALUE,
                new GroupType(
                        Repetition.REPEATED,
                        mapAlias,
                        keyType,
                        valueType));
    }
}
 
Example 27
Source Project: parquet-mr   Source File: SimpleRecordConverter.java    License: Apache License 2.0 5 votes vote down vote up
private Converter createConverter(Type field) {
  LogicalTypeAnnotation ltype = field.getLogicalTypeAnnotation();

  if (field.isPrimitive()) {
    if (ltype != null) {
      return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
        @Override
        public Optional<Converter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
          return of(new StringConverter(field.getName()));
        }

        @Override
        public Optional<Converter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
          int scale = decimalLogicalType.getScale();
          return of(new DecimalConverter(field.getName(), scale));
        }
      }).orElse(new SimplePrimitiveConverter(field.getName()));
    }
    return new SimplePrimitiveConverter(field.getName());
  }

  GroupType groupType = field.asGroupType();
  if (ltype != null) {
    return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
        return of(new SimpleMapRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
      }

      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
        return of(new SimpleListRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
      }
    }).orElse(new SimpleRecordConverter(groupType, field.getName(), this));
  }
  return new SimpleRecordConverter(groupType, field.getName(), this);
}
 
Example 28
Source Project: pxf   Source File: ParquetFileAccessor.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Opens the resource for read.
 *
 * @throws IOException if opening the resource failed
 */
@Override
public boolean openForRead() throws IOException {
    file = new Path(context.getDataSource());
    FileSplit fileSplit = HdfsUtilities.parseFileSplit(context);

    // Read the original schema from the parquet file
    MessageType originalSchema = getSchema(file, fileSplit);
    // Get a map of the column name to Types for the given schema
    Map<String, Type> originalFieldsMap = getOriginalFieldsMap(originalSchema);
    // Get the read schema. This is either the full set or a subset (in
    // case of column projection) of the greenplum schema.
    MessageType readSchema = buildReadSchema(originalFieldsMap, originalSchema);
    // Get the record filter in case of predicate push-down
    FilterCompat.Filter recordFilter = getRecordFilter(context.getFilterString(), originalFieldsMap, readSchema);

    // add column projection
    configuration.set(PARQUET_READ_SCHEMA, readSchema.toString());

    fileReader = ParquetReader.builder(new GroupReadSupport(), file)
            .withConf(configuration)
            // Create reader for a given split, read a range in file
            .withFileRange(fileSplit.getStart(), fileSplit.getStart() + fileSplit.getLength())
            .withFilter(recordFilter)
            .build();
    context.setMetadata(readSchema);
    return true;
}
 
Example 29
Source Project: parquet-mr   Source File: ThriftRecordConverter.java    License: Apache License 2.0 5 votes vote down vote up
private StructConverter(List<TProtocol> events, GroupType parquetSchema, ThriftField field) {
  this.events = events;
  this.name = field.getName();
  this.tStruct = new TStruct(name);
  this.thriftType = (StructType)field.getType();
  this.schemaSize = parquetSchema.getFieldCount();
  this.converters = new Converter[this.schemaSize];
  List<ThriftField> thriftChildren = thriftType.getChildren();
  for (int i = 0; i < schemaSize; i++) {
    Type schemaType = parquetSchema.getType(i);
    String fieldName = schemaType.getName();
    ThriftField matchingThrift = null;
    for (ThriftField childField: thriftChildren) {
      String thriftChildName = childField.getName();
      if (thriftChildName != null && thriftChildName.equalsIgnoreCase(fieldName)) {
        matchingThrift = childField;
        break;
      }
    }
    if (matchingThrift == null) {
    	// this means the file did not contain that field
      // it will never be populated in this instance
      // other files might populate it
    	continue;
    }
    if (schemaType.isPrimitive()) {
    	converters[i] = new PrimitiveFieldHandler(newConverter(events, schemaType, matchingThrift).asPrimitiveConverter(), matchingThrift, events);
    } else {
    	converters[i] = new GroupFieldhandler(newConverter(events, schemaType, matchingThrift).asGroupConverter(), matchingThrift, events);
    }
  }
}
 
Example 30
Source Project: flink   Source File: RowConverter.java    License: Apache License 2.0 5 votes vote down vote up
ArrayConverter(Type elementType, Class elementClass, TypeInformation elementTypeInfo,
					ParentDataHolder parentDataHolder, int pos) {
	this.elementClass = elementClass;
	this.parentDataHolder = parentDataHolder;
	this.pos = pos;

	if (elementClass.equals(Row.class)) {
		this.elementConverter = createConverter(elementType, 0, elementTypeInfo, this);
	} else {
		this.elementConverter = new RowConverter.RowPrimitiveConverter(elementType, this, 0);
	}
}