Java Code Examples for org.apache.parquet.schema.GroupType

The following examples show how to use org.apache.parquet.schema.GroupType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Bats   Source File: Metadata.java    License: Apache License 2.0 6 votes vote down vote up
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
  if (type.isPrimitive()) {
    PrimitiveType primitiveType = (PrimitiveType) type;
    int precision = 0;
    int scale = 0;
    if (primitiveType.getDecimalMetadata() != null) {
      precision = primitiveType.getDecimalMetadata().getPrecision();
      scale = primitiveType.getDecimalMetadata().getScale();
    }

    int repetitionLevel = schema.getMaxRepetitionLevel(path);
    int definitionLevel = schema.getMaxDefinitionLevel(path);

    return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
  }
  Type t = ((GroupType) type).getType(path[depth]);
  return getColTypeInfo(schema, t, path, depth + 1);
}
 
Example 2
Source Project: Bats   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
private Type getType(MaterializedField field) {
  MinorType minorType = field.getType().getMinorType();
  DataMode dataMode = field.getType().getMode();
  switch (minorType) {
    case MAP:
      List<Type> types = Lists.newArrayList();
      for (MaterializedField childField : field.getChildren()) {
        types.add(getType(childField));
      }
      return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getName(), types);
    case LIST:
      throw new UnsupportedOperationException("Unsupported type " + minorType);
    case NULL:
      MaterializedField newField = field.withType(
        TypeProtos.MajorType.newBuilder().setMinorType(MinorType.INT).setMode(DataMode.OPTIONAL).build());
      return getPrimitiveType(newField);
    default:
      return getPrimitiveType(field);
  }
}
 
Example 3
Source Project: presto   Source File: TestParquetPredicateUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testParquetTupleDomainStruct()
{
    RowType rowType = rowType(
            RowType.field("a", INTEGER),
            RowType.field("b", INTEGER));

    HiveColumnHandle columnHandle = createBaseColumn("my_struct", 0, HiveType.valueOf("struct<a:int,b:int>"), rowType, REGULAR, Optional.empty());
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType)));

    MessageType fileSchema = new MessageType("hive_schema",
            new GroupType(OPTIONAL, "my_struct",
                    new PrimitiveType(OPTIONAL, INT32, "a"),
                    new PrimitiveType(OPTIONAL, INT32, "b")));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
    assertTrue(tupleDomain.isAll());
}
 
Example 4
Source Project: parquet-mr   Source File: ThriftRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
public ElementConverter(String listName, List<TProtocol> listEvents,
                        GroupType repeatedType, ThriftField thriftElement) {
  this.listEvents = listEvents;
  this.elementEvents = new ArrayList<TProtocol>();
  Type elementType = repeatedType.getType(0);
  if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
    if (ignoreNullElements) {
      LOG.warn("List " + listName +
          " has optional elements: null elements are ignored.");
    } else {
      throw new ParquetDecodingException("Cannot read list " + listName +
          " with optional elements: set " + IGNORE_NULL_LIST_ELEMENTS +
          " to ignore nulls.");
    }
  }
  elementConverter = newConverter(elementEvents, elementType, thriftElement);
}
 
Example 5
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType)
{
    //support projection only on key of a map
    if (valueType == null) {
        return listWrapper(
                repetition,
                alias,
                MAP_KEY_VALUE,
                new GroupType(
                        Repetition.REPEATED,
                        mapAlias,
                        keyType));
    }
    if (!valueType.getName().equals("value")) {
        throw new RuntimeException(valueType.getName() + " should be value");
    }
    return listWrapper(
            repetition,
            alias,
            MAP_KEY_VALUE,
            new GroupType(
                    Repetition.REPEATED,
                    mapAlias,
                    keyType,
                    valueType));
}
 
Example 6
private Type buildSchema() {
  JsonArray inputSchema = this.jsonSchema.getDataTypeValues();
  List<Type> parquetTypes = new ArrayList<>();
  for (JsonElement element : inputSchema) {
    JsonObject map = (JsonObject) element;
    JsonSchema elementSchema = new JsonSchema(map);
    String columnName = elementSchema.getColumnName();
    JsonElementConverter converter = JsonElementConversionFactory.getConverter(elementSchema, false);
    Type schemaType = converter.schema();
    this.converters.put(columnName, converter);
    parquetTypes.add(schemaType);
  }
  String docName = this.jsonSchema.getColumnName();
  switch (recordType) {
    case ROOT:
      return new MessageType(docName, parquetTypes);
    case CHILD:
      return new GroupType(optionalOrRequired(this.jsonSchema), docName, parquetTypes);
    default:
      throw new RuntimeException("Unsupported Record type");
  }
}
 
Example 7
Source Project: presto   Source File: TestDataWritableWriter.java    License: Apache License 2.0 6 votes vote down vote up
private void writeSingleLevelArray(Object value, ListObjectInspector inspector, GroupType type)
{
    // Get the internal array structure
    Type elementType = type.getType(0);

    recordConsumer.startGroup();

    List<?> arrayValues = inspector.getList(value);
    if (!arrayValues.isEmpty()) {
        recordConsumer.startField(elementType.getName(), 0);
        ObjectInspector elementInspector = inspector.getListElementObjectInspector();

        for (Object element : arrayValues) {
            if (element == null) {
                throw new IllegalArgumentException("Array elements are requires in given schema definition");
            }
            writeValue(element, elementInspector, elementType);
        }

        recordConsumer.endField(elementType.getName(), 0);
    }
    recordConsumer.endGroup();
}
 
Example 8
Source Project: parquet-mr   Source File: ThriftRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
private boolean hasMissingRequiredFieldInGroupType(GroupType requested, GroupType fullSchema) {
  for (Type field : fullSchema.getFields()) {

    if (requested.containsField(field.getName())) {
      Type requestedType = requested.getType(field.getName());
      // if a field is in requested schema and the type of it is a group type, then do recursive check
      if (!field.isPrimitive()) {
        if (hasMissingRequiredFieldInGroupType(requestedType.asGroupType(), field.asGroupType())) {
          return true;
        } else {
          continue;// check next field
        }
      }
    } else {
      if (field.getRepetition() == Type.Repetition.REQUIRED) {
        return true; // if a field is missing in requested schema and it's required
      } else {
        continue; // the missing field is not required, then continue checking next field
      }
    }
  }

  return false;
}
 
Example 9
Source Project: parquet-mr   Source File: SimpleGroupConverter.java    License: Apache License 2.0 6 votes vote down vote up
SimpleGroupConverter(SimpleGroupConverter parent, int index, GroupType schema) {
  this.parent = parent;
  this.index = index;

  converters = new Converter[schema.getFieldCount()];

  for (int i = 0; i < converters.length; i++) {
    final Type type = schema.getType(i);
    if (type.isPrimitive()) {
      converters[i] = new SimplePrimitiveConverter(this, i);
    } else {
      converters[i] = new SimpleGroupConverter(this, i, type.asGroupType());
    }

  }
}
 
Example 10
Source Project: parquet-mr   Source File: AvroRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
public AvroCollectionConverter(ParentValueContainer parent, GroupType type,
                               Schema avroSchema, GenericData model,
                               Class<?> containerClass) {
  this.parent = parent;
  this.avroSchema = avroSchema;
  this.containerClass = containerClass;
  Schema elementSchema = AvroSchemaConverter.getNonNull(avroSchema.getElementType());
  Type repeatedType = type.getType(0);
  // always determine whether the repeated type is the element type by
  // matching it against the element schema.
  if (isElementType(repeatedType, elementSchema)) {
    // the element type is the repeated type (and required)
    converter = newConverter(elementSchema, repeatedType, model, new ParentValueContainer() {
      @Override
      @SuppressWarnings("unchecked")
      public void add(Object value) {
        container.add(value);
      }
    });
  } else {
    // the element is wrapped in a synthetic group and may be optional
    converter = new ElementConverter(repeatedType.asGroupType(), elementSchema, model);
  }
}
 
Example 11
Source Project: parquet-mr   Source File: AvroIndexedRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
public AvroArrayConverter(ParentValueContainer parent, GroupType type,
    Schema avroSchema, GenericData model) {
  this.parent = parent;
  this.avroSchema = avroSchema;
  Schema elementSchema = AvroSchemaConverter
      .getNonNull(avroSchema.getElementType());
  Type repeatedType = type.getType(0);
  // always determine whether the repeated type is the element type by
  // matching it against the element schema.
  if (AvroRecordConverter.isElementType(repeatedType, elementSchema)) {
    // the element type is the repeated type (and required)
    converter = newConverter(elementSchema, repeatedType, model, new ParentValueContainer() {
      @Override
      @SuppressWarnings("unchecked")
      public void add(Object value) {
        array.add(value);
      }
    });
  } else {
    // the element is wrapped in a synthetic group and may be optional
    converter = new ElementConverter(repeatedType.asGroupType(), elementSchema, model);
  }
}
 
Example 12
Source Project: iceberg   Source File: MessageTypeToType.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Type list(GroupType array, Type elementType) {
  GroupType repeated = array.getType(0).asGroupType();
  org.apache.parquet.schema.Type element = repeated.getType(0);

  Preconditions.checkArgument(
      !element.isRepetition(Repetition.REPEATED),
      "Elements cannot have repetition REPEATED: %s", element);

  int elementFieldId = getId(element);

  addAlias(element.getName(), elementFieldId);

  if (element.isRepetition(Repetition.OPTIONAL)) {
    return Types.ListType.ofOptional(elementFieldId, elementType);
  } else {
    return Types.ListType.ofRequired(elementFieldId, elementType);
  }
}
 
Example 13
Source Project: parquet-mr   Source File: DataWritableGroupConverter.java    License: Apache License 2.0 6 votes vote down vote up
public DataWritableGroupConverter(final GroupType selectedGroupType,
    final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
  this.parent = parent;
  this.index = index;
  final int totalFieldCount = containingGroupType.getFieldCount();
  final int selectedFieldCount = selectedGroupType.getFieldCount();

  currentArr = new Object[totalFieldCount];
  converters = new Converter[selectedFieldCount];

  List<Type> selectedFields = selectedGroupType.getFields();
  for (int i = 0; i < selectedFieldCount; i++) {
    Type subtype = selectedFields.get(i);
    if (containingGroupType.getFields().contains(subtype)) {
      converters[i] = getConverterFromDescription(subtype,
          containingGroupType.getFieldIndex(subtype.getName()), this);
    } else {
      throw new IllegalStateException("Group type [" + containingGroupType +
          "] does not contain requested field: " + subtype);
    }
  }
}
 
Example 14
Source Project: dremio-oss   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Changes the list inner '$data$' vector name to 'element' in the schema
 */
private Type renameChildTypeToElement(Type childType) {
  if (childType.isPrimitive()) {
    PrimitiveType childPrimitiveType = childType.asPrimitiveType();
    return new PrimitiveType(childType.getRepetition(),
      childPrimitiveType.getPrimitiveTypeName(),
      childPrimitiveType.getTypeLength(),
      "element",
      childPrimitiveType.getOriginalType(),
      childPrimitiveType.getDecimalMetadata(),
      childPrimitiveType.getId());
  } else {
    GroupType childGroupType = childType.asGroupType();
    Type.ID id = childGroupType.getId();
    GroupType groupType = new GroupType(childType.getRepetition(),
      "element",
      childType.getOriginalType(),
      childGroupType.getFields());
    if (id != null) {
      groupType = groupType.withId(id.hashCode());
    }
    return groupType;
  }
}
 
Example 15
Source Project: hadoop-etl-udfs   Source File: TupleWriter.java    License: MIT License 6 votes vote down vote up
private void writeTuple(Tuple tuple, GroupType type) {
    for (int index = 0; index < type.getFieldCount(); index++) {
        Type fieldType = type.getType(index);
        String fieldName = fieldType.getName();
        // empty fields have to be omitted
        if (tuple.isNull(index))
            continue;
        recordConsumer.startField(fieldName, index);
        if (fieldType.isPrimitive()) {
            tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType);
        }
        else {
            recordConsumer.startGroup();
            writeTuple(tuple.getTuple(index), fieldType.asGroupType());
            recordConsumer.endGroup();
        }
        recordConsumer.endField(fieldName, index);
    }
}
 
Example 16
Source Project: parquet-mr   Source File: ProtoMessageConverter.java    License: Apache License 2.0 6 votes vote down vote up
public ListConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
  LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
  if (!(logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) || parquetType.isPrimitive()) {
    throw new ParquetDecodingException("Expected LIST wrapper. Found: " + logicalTypeAnnotation + " instead.");
  }

  GroupType rootWrapperType = parquetType.asGroupType();
  if (!rootWrapperType.containsField("list") || rootWrapperType.getType("list").isPrimitive()) {
    throw new ParquetDecodingException("Expected repeated 'list' group inside LIST wrapperr but got: " + rootWrapperType);
  }

  GroupType listType = rootWrapperType.getType("list").asGroupType();
  if (!listType.containsField("element")) {
    throw new ParquetDecodingException("Expected 'element' inside repeated list group but got: " + listType);
  }

  Type elementType = listType.getType("element");
  converter = newMessageConverter(parentBuilder, fieldDescriptor, elementType);
}
 
Example 17
Source Project: dremio-oss   Source File: ParquetGroupConverter.java    License: Apache License 2.0 6 votes vote down vote up
ParquetGroupConverter(
    ParquetColumnResolver columnResolver, OutputMutator mutator,
    GroupType schema,
    Collection<SchemaPath> columns,
    OptionManager options,
    List<Field> arrowSchema,
    Function<String, String> childNameResolver,
    SchemaDerivationHelper schemaHelper) {
  this.converters = Lists.newArrayList();
  this.mutator = mutator;
  this.schema = schema;
  this.columns = columns;
  this.options = options;
  this.arrowSchema = arrowSchema;
  this.childNameResolver = childNameResolver;
  this.schemaHelper = schemaHelper;
  this.columnResolver = columnResolver;
  this.maxFieldSizeLimit = Math.toIntExact(options.getOption(ExecConstants.LIMIT_FIELD_SIZE_BYTES));
}
 
Example 18
Source Project: datacollector   Source File: AvroWriteSupportInt96Avro18.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void writeObjectArray(GroupType type, Schema schema,
    Object[] array) {
  if (array.length > 0) {
    recordConsumer.startField(OLD_LIST_REPEATED_NAME, 0);
    try {
      for (Object element : array) {
        writeValue(type.getType(0), schema.getElementType(), element);
      }
    } catch (NullPointerException e) {
      // find the null element and throw a better error message
      for (int i = 0; i < array.length; i += 1) {
        if (array[i] == null) {
          throw new NullPointerException(
              "Array contains a null element at " + i + "\n" +
                  "Set parquet.avro.write-old-list-structure=false to turn " +
                  "on support for arrays with null elements.");
        }
      }
      // no element was null, throw the original exception
      throw e;
    }
    recordConsumer.endField(OLD_LIST_REPEATED_NAME, 0);
  }
}
 
Example 19
Source Project: parquet-mr   Source File: AvroWriteSupport.java    License: Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema avroSchema,
                               Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}
 
Example 20
Source Project: parquet-mr   Source File: PigSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException {
  if (LOG.isDebugEnabled()) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema);
  if (bagType.getFieldCount() != 1) {
    throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType);
  }
  Type nested = bagType.getType(0);
  FieldSchema innerField = bagFieldSchema.schema.getField(0);
  if (nested.isPrimitive() || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation
    || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
    // Bags always contain tuples => we skip the extra tuple that was inserted in that case.
    innerField = innerField.schema.getField(0);
  }
  return bagType.withNewFields(filter(nested, innerField));
}
 
Example 21
Source Project: presto   Source File: ParquetPageSourceFactory.java    License: Apache License 2.0 5 votes vote down vote up
public static Optional<org.apache.parquet.schema.Type> getParquetType(GroupType groupType, boolean useParquetColumnNames, HiveColumnHandle column)
{
    if (useParquetColumnNames) {
        return Optional.ofNullable(getParquetTypeByName(column.getBaseColumnName(), groupType));
    }
    if (column.getBaseHiveColumnIndex() < groupType.getFieldCount()) {
        return Optional.of(groupType.getType(column.getBaseHiveColumnIndex()));
    }

    return Optional.empty();
}
 
Example 22
Source Project: tajo   Source File: TajoRecordConverter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new TajoRecordConverter.
 *
 * @param parquetSchema The Parquet schema of the projection.
 * @param tajoReadSchema The Tajo schema of the table.
 * @param projectionMap An array mapping the projection column to the column
 *                      index in the table.
 */
public TajoRecordConverter(GroupType parquetSchema, Schema tajoReadSchema,
                           int[] projectionMap) {
  this.parquetSchema = parquetSchema;
  this.tajoReadSchema = tajoReadSchema;
  this.projectionMap = projectionMap;
  this.tupleSize = tajoReadSchema.size();

  // The projectionMap.length does not match parquetSchema.getFieldCount()
  // when the projection contains NULL_TYPE columns. We will skip over the
  // NULL_TYPE columns when we construct the converters and populate the
  // NULL_TYPE columns with NullDatums in start().
  int index = 0;
  this.converters = new Converter[parquetSchema.getFieldCount()];
  for (int i = 0; i < projectionMap.length; ++i) {
    final int projectionIndex = projectionMap[i];
    Column column = tajoReadSchema.getColumn(projectionIndex);
    if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
      continue;
    }
    Type type = parquetSchema.getType(index);
    final int writeIndex = i;
    converters[index] = newConverter(column, type, new ParentValueContainer() {
      @Override
      void add(Object value) {
        TajoRecordConverter.this.set(writeIndex, value);
      }
    });
    ++index;
  }
}
 
Example 23
Source Project: parquet-mr   Source File: List3Levels.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Will validate the structure of the list
 * @param list the Parquet List
 */
public List3Levels(GroupType list) {
  if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.list = list;
  Type repeatedField = list.getFields().get(0);
  if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.repeated = repeatedField.asGroupType();
  this.element = repeated.getFields().get(0);
}
 
Example 24
Source Project: parquet-mr   Source File: SimpleRecordConverter.java    License: Apache License 2.0 5 votes vote down vote up
public SimpleRecordConverter(GroupType schema, String name, SimpleRecordConverter parent) {
  this.converters = new Converter[schema.getFieldCount()];
  this.parent = parent;
  this.name = name;

  int i = 0;
  for (Type field: schema.getFields()) {
    converters[i++] = createConverter(field);
  }
}
 
Example 25
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(repetition, name, "map", keyType, valueType);
}
 
Example 26
Source Project: parquet-mr   Source File: DataWritableWriter.java    License: Apache License 2.0 5 votes vote down vote up
private void writeData(final ArrayWritable arr, final GroupType type) {
  if (arr == null) {
    return;
  }
  final int fieldCount = type.getFieldCount();
  Writable[] values = arr.get();
  for (int field = 0; field < fieldCount; ++field) {
    final Type fieldType = type.getType(field);
    final String fieldName = fieldType.getName();
    final Writable value = values[field];
    if (value == null) {
      continue;
    }
    recordConsumer.startField(fieldName, field);

    if (fieldType.isPrimitive()) {
      writePrimitive(value);
    } else {
      recordConsumer.startGroup();
      if (value instanceof ArrayWritable) {
        if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) {
          writeArray((ArrayWritable) value, fieldType.asGroupType());
        } else {
          writeData((ArrayWritable) value, fieldType.asGroupType());
        }
      } else if (value != null) {
        throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value);
      }

      recordConsumer.endGroup();
    }

    recordConsumer.endField(fieldName, field);
  }
}
 
Example 27
Source Project: presto   Source File: TestDataWritableWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * It writes the field value to the Parquet RecordConsumer. It detects the field type, and calls
 * the correct write function.
 *
 * @param value The writable object that contains the value.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the type schema.
 */
private void writeValue(Object value, ObjectInspector inspector, Type type)
{
    if (type.isPrimitive()) {
        checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
        writePrimitive(value, (PrimitiveObjectInspector) inspector);
    }
    else {
        GroupType groupType = type.asGroupType();
        OriginalType originalType = type.getOriginalType();

        if (OriginalType.LIST == originalType) {
            checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
            if (singleLevelArray) {
                writeSingleLevelArray(value, (ListObjectInspector) inspector, groupType);
            }
            else {
                writeArray(value, (ListObjectInspector) inspector, groupType);
            }
        }
        else if (originalType != null && (originalType == OriginalType.MAP || originalType == OriginalType.MAP_KEY_VALUE)) {
            checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
            writeMap(value, (MapObjectInspector) inspector, groupType);
        }
        else {
            checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
            writeGroup(value, (StructObjectInspector) inspector, groupType);
        }
    }
}
 
Example 28
Source Project: datacollector   Source File: AvroWriteSupportInt96Avro18.java    License: Apache License 2.0 5 votes vote down vote up
public void writeList(GroupType schema, Schema avroSchema, Object value) {
  recordConsumer.startGroup(); // group wrapper (original type LIST)
  if (value instanceof Collection) {
    writeCollection(schema, avroSchema, (Collection) value);
  } else {
    Class<?> arrayClass = value.getClass();
    Preconditions.checkArgument(arrayClass.isArray(),
        "Cannot write unless collection or array: " + arrayClass.getName());
    writeJavaArray(schema, avroSchema, arrayClass, value);
  }
  recordConsumer.endGroup();
}
 
Example 29
Source Project: tajo   Source File: InternalParquetRecordReader.java    License: Apache License 2.0 5 votes vote down vote up
private boolean contains(GroupType group, String[] path, int index) {
  if (index == path.length) {
    return false;
  }
  if (group.containsField(path[index])) {
    Type type = group.getType(path[index]);
    if (type.isPrimitive()) {
      return index + 1 == path.length;
    } else {
      return contains(type.asGroupType(), path, index + 1);
    }
  }
  return false;
}
 
Example 30
Source Project: parquet-mr   Source File: TupleConverter.java    License: Apache License 2.0 5 votes vote down vote up
public TupleConverter(GroupType parquetSchema, Schema pigSchema, boolean elephantBirdCompatible, boolean columnIndexAccess) {
  this.parquetSchema = parquetSchema;
  this.elephantBirdCompatible = elephantBirdCompatible;
  try {
    this.schemaSize = max(parquetSchema.getFieldCount(), pigSchema.getFields().size());
    this.converters = new Converter[this.schemaSize];
    for (int i = 0, c = 0; i < schemaSize; i++) {
      FieldSchema field = pigSchema.getField(i);
      if(parquetSchema.containsField(field.alias) || columnIndexAccess) {
        Type type = getType(columnIndexAccess, field.alias, i);

        if(type != null) {
          final int index = i;
          converters[c++] = newConverter(field, type, new ParentValueContainer() {
            @Override
            void add(Object value) {
              TupleConverter.this.set(index, value);
            }
          }, elephantBirdCompatible, columnIndexAccess);
        }
      }

    }
  } catch (FrontendException e) {
    throw new ParquetDecodingException("can not initialize pig converter from:\n" + parquetSchema + "\n" + pigSchema, e);
  }
}