Java Code Examples for org.apache.parquet.schema.Type#isRepetition()

The following examples show how to use org.apache.parquet.schema.Type#isRepetition() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSchemaConverter190Int96Avro17.java    From datacollector with Apache License 2.0 7 votes vote down vote up
private Schema convertFields(String name, List<Type> parquetFields) {
  List<Schema.Field> fields = new ArrayList<Schema.Field>();
  for (Type parquetType : parquetFields) {
    Schema fieldSchema = convertField(parquetType);
    if (parquetType.isRepetition(REPEATED)) {
      throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType);
    } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) {
      fields.add(new Schema.Field(
          parquetType.getName(), optional(fieldSchema), null, NULL_VALUE));
    } else { // REQUIRED
      fields.add(new Schema.Field(
          parquetType.getName(), fieldSchema, null, (Object) null));
    }
  }
  Schema schema = Schema.createRecord(name, null, null, false);
  schema.setFields(fields);
  return schema;
}
 
Example 2
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeObjectArray(GroupType type, Schema schema,
    Object[] array) {
  if (array.length > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : array) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 3
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema avroSchema,
                               Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}
 
Example 4
Source File: AvroSchemaConverter190Int96Avro18.java    From datacollector with Apache License 2.0 6 votes vote down vote up
private Schema convertFields(String name, List<Type> parquetFields) {
  List<Schema.Field> fields = new ArrayList<Schema.Field>();
  for (Type parquetType : parquetFields) {
    Schema fieldSchema = convertField(parquetType);
    if (parquetType.isRepetition(REPEATED)) {
      throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType);
    } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) {
      fields.add(new Schema.Field(
          parquetType.getName(), optional(fieldSchema), null, NULL_VALUE));
    } else { // REQUIRED
      fields.add(new Schema.Field(
          parquetType.getName(), fieldSchema, null, (Object) null));
    }
  }
  Schema schema = Schema.createRecord(name, null, null, false);
  schema.setFields(fields);
  return schema;
}
 
Example 5
Source File: TajoWriteSupport.java    From tajo with Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema tajoSchema,
                               Tuple tuple) {
  List<Type> fields = schema.getFields();
  // Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
  int index = 0;
  for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
    Column column = tajoSchema.getColumn(tajoIndex);
    if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
      continue;
    }
    Type fieldType = fields.get(index);
    if (!tuple.isBlankOrNull(tajoIndex)) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(column, tuple, tajoIndex);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " +
          column.getSimpleName());
    }
    ++index;
  }
}
 
Example 6
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeObjectArray(GroupType type, Schema schema,
    Object[] array) {
  if (array.length > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : array) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 7
Source File: ThriftRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public ElementConverter(String listName, List<TProtocol> listEvents,
                        GroupType repeatedType, ThriftField thriftElement) {
  this.listEvents = listEvents;
  this.elementEvents = new ArrayList<TProtocol>();
  Type elementType = repeatedType.getType(0);
  if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
    if (ignoreNullElements) {
      LOG.warn("List " + listName +
          " has optional elements: null elements are ignored.");
    } else {
      throw new ParquetDecodingException("Cannot read list " + listName +
          " with optional elements: set " + IGNORE_NULL_LIST_ELEMENTS +
          " to ignore nulls.");
    }
  }
  elementConverter = newConverter(elementEvents, elementType, thriftElement);
}
 
Example 8
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeCollection(GroupType type, Schema schema, Collection<?> collection) {
  if (collection.size() > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : collection) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 9
Source File: SimpleGroup.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void add(int fieldIndex, Primitive value) {
  Type type = schema.getType(fieldIndex);
  List<Object> list = data[fieldIndex];
  if (!type.isRepetition(Type.Repetition.REPEATED)
      && !list.isEmpty()) {
    throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list);
  }
  list.add(value);
}
 
Example 10
Source File: ParquetGroup.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public void add(int fieldIndex, Primitive value) {
  Type type = this.schema.getType(fieldIndex);
  List<Object> list = this.data[fieldIndex];
  if (!type.isRepetition(REPEATED) && !list.isEmpty()) {
    throw new IllegalStateException(
        "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list);
  } else {
    list.add(value);
  }
}
 
Example 11
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private <V> void writeMap(GroupType schema, Schema avroSchema,
    Map<CharSequence, V> map) {
  GroupType innerGroup = schema.getType(0).asGroupType();
  Type keyType = innerGroup.getType(0);
  Type valueType = innerGroup.getType(1);

  recordConsumer.startGroup(); // group wrapper (original type MAP)
  if (map.size() > 0) {
    recordConsumer.startField(MAP_REPEATED_NAME, 0);

    for (Map.Entry<CharSequence, V> entry : map.entrySet()) {
      recordConsumer.startGroup(); // repeated group key_value, middle layer
      recordConsumer.startField(MAP_KEY_NAME, 0);
      writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey());
      recordConsumer.endField(MAP_KEY_NAME, 0);
      V value = entry.getValue();
      if (value != null) {
        recordConsumer.startField(MAP_VALUE_NAME, 1);
        writeValue(valueType, avroSchema.getValueType(), value);
        recordConsumer.endField(MAP_VALUE_NAME, 1);
      } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException("Null map value for " + avroSchema.getName());
      }
      recordConsumer.endGroup();
    }

    recordConsumer.endField(MAP_REPEATED_NAME, 0);
  }
  recordConsumer.endGroup();
}
 
Example 12
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private <V> void writeMap(GroupType schema, Schema avroSchema,
                          Map<CharSequence, V> map) {
  GroupType innerGroup = schema.getType(0).asGroupType();
  Type keyType = innerGroup.getType(0);
  Type valueType = innerGroup.getType(1);

  recordConsumer.startGroup(); // group wrapper (original type MAP)
  if (map.size() > 0) {
    recordConsumer.startField(MAP_REPEATED_NAME, 0);

    for (Map.Entry<CharSequence, V> entry : map.entrySet()) {
      recordConsumer.startGroup(); // repeated group key_value, middle layer
      recordConsumer.startField(MAP_KEY_NAME, 0);
      writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey());
      recordConsumer.endField(MAP_KEY_NAME, 0);
      V value = entry.getValue();
      if (value != null) {
        recordConsumer.startField(MAP_VALUE_NAME, 1);
        writeValue(valueType, avroSchema.getValueType(), value);
        recordConsumer.endField(MAP_VALUE_NAME, 1);
      } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException("Null map value for " + avroSchema.getName());
      }
      recordConsumer.endGroup();
    }

    recordConsumer.endField(MAP_REPEATED_NAME, 0);
  }
  recordConsumer.endGroup();
}
 
Example 13
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(ParquetWriter<Group> writer) throws IOException {
  for (int index = 0; index < recordCount; index++) {
    Group group = new SimpleGroup(super.schema);

    for (int column = 0, columnCnt = schema.getFieldCount(); column < columnCnt; ++column) {
      Type type = schema.getType(column);
      RandomValueGenerator<?> generator = randomGenerators.get(column);
      if (type.isRepetition(OPTIONAL) && generator.shouldGenerateNull()) {
        continue;
      }
      switch (type.asPrimitiveType().getPrimitiveTypeName()) {
      case BINARY:
      case FIXED_LEN_BYTE_ARRAY:
      case INT96:
        group.append(type.getName(), ((RandomBinaryBase<?>) generator).nextBinaryValue());
        break;
      case INT32:
        group.append(type.getName(), (Integer) generator.nextValue());
        break;
      case INT64:
        group.append(type.getName(), (Long) generator.nextValue());
        break;
      case FLOAT:
        group.append(type.getName(), (Float) generator.nextValue());
        break;
      case DOUBLE:
        group.append(type.getName(), (Double) generator.nextValue());
        break;
      case BOOLEAN:
        group.append(type.getName(), (Boolean) generator.nextValue());
        break;
      }
    }
    writer.write(group);
  }
}
 
Example 14
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
final public void start() {
  currentTuple = TF.newTuple(schemaSize);
  if (elephantBirdCompatible) {
    try {
      int i = 0;
      for (Type field : parquetSchema.getFields()) {
        if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
          PrimitiveType primitiveType = field.asPrimitiveType();
          switch (primitiveType.getPrimitiveTypeName()) {
          case INT32:
            currentTuple.set(i, I32_ZERO);
            break;
          case INT64:
            currentTuple.set(i, I64_ZERO);
            break;
          case FLOAT:
            currentTuple.set(i, FLOAT_ZERO);
            break;
          case DOUBLE:
            currentTuple.set(i, DOUBLE_ZERO);
            break;
          case BOOLEAN:
            currentTuple.set(i, I32_ZERO);
            break;
          }
        }
        ++ i;
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
}
 
Example 15
Source File: TajoSchemaConverter.java    From tajo with Apache License 2.0 5 votes vote down vote up
private Schema convertFields(List<Type> parquetFields) {
  List<Column> columns = new ArrayList<>();
  for (Type fieldType : parquetFields) {
    if (fieldType.isRepetition(Type.Repetition.REPEATED)) {
      throw new RuntimeException("REPEATED not supported outside LIST or" +
          " MAP. Type: " + fieldType);
    }
    columns.add(convertField(fieldType));
  }
  Column[] columnsArray = new Column[columns.size()];
  columnsArray = columns.toArray(columnsArray);
  return SchemaBuilder.builder().addAll(columnsArray).build();
}
 
Example 16
Source File: ParquetValueReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static <T> ParquetValueReader<T> option(Type type, int definitionLevel,
                                               ParquetValueReader<T> reader) {
  if (type.isRepetition(Type.Repetition.OPTIONAL)) {
    return new OptionReader<>(definitionLevel, reader);
  }
  return reader;
}
 
Example 17
Source File: ParquetValueWriters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static <T> ParquetValueWriter<T> option(Type type,
                                               int definitionLevel,
                                               ParquetValueWriter<T> writer) {
  if (type.isRepetition(Type.Repetition.OPTIONAL)) {
    return new OptionWriter<>(definitionLevel, writer);
  }

  return writer;
}
 
Example 18
Source File: TestColumnIndexes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Group createGroup(List<Supplier<?>> generators, Random random) {
  Group group = FACTORY.newGroup();
  for (int column = 0, columnCnt = SCHEMA.getFieldCount(); column < columnCnt; ++column) {
    Type type = SCHEMA.getType(column);
    Supplier<?> generator = generators.get(column);
    // 2% chance of null value for an optional column
    if (generator == null || (type.isRepetition(OPTIONAL) && random.nextInt(50) == 0)) {
      continue;
    }
    switch (type.asPrimitiveType().getPrimitiveTypeName()) {
    case BINARY:
    case FIXED_LEN_BYTE_ARRAY:
    case INT96:
      group.append(type.getName(), (Binary) generator.get());
      break;
    case INT32:
      group.append(type.getName(), (Integer) generator.get());
      break;
    case INT64:
      group.append(type.getName(), (Long) generator.get());
      break;
    case FLOAT:
      group.append(type.getName(), (Float) generator.get());
      break;
    case DOUBLE:
      group.append(type.getName(), (Double) generator.get());
      break;
    case BOOLEAN:
      group.append(type.getName(), (Boolean) generator.get());
      break;
    }
  }
  return group;
}
 
Example 19
Source File: HiveClient.java    From garmadon with Apache License 2.0 5 votes vote down vote up
protected String inferHiveType(Type field) throws Exception {
    String fieldHiveType;

    switch (field.asPrimitiveType().getPrimitiveTypeName().name()) {
        case "BINARY":
            fieldHiveType = "string";
            break;
        case "INT32":
            fieldHiveType = "int";
            break;
        case "INT64":
            fieldHiveType = "bigint";
            break;
        case "FLOAT":
            fieldHiveType = "float";
            break;
        case "DOUBLE":
            fieldHiveType = "double";
            break;
        case "BOOLEAN":
            fieldHiveType = "boolean";
            break;
        default:
            throw new Exception("Unsupported Data Type: " + field.asPrimitiveType().getPrimitiveTypeName().name());
    }

    if (field.isRepetition(Type.Repetition.REPEATED)) {
        fieldHiveType = "array<" + fieldHiveType + ">";
    }

    return fieldHiveType;
}
 
Example 20
Source File: MapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 5 votes vote down vote up
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
    if (!nested.isRepetition(Repetition.REPEATED)) {
        throw new IllegalArgumentException("Nested type should be repeated: " + nested);
    }
    return new GroupType(repetition, alias, originalType, nested);
}