Java Code Examples for org.apache.parquet.schema.PrimitiveType

The following examples show how to use org.apache.parquet.schema.PrimitiveType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: parquet-mr   Source File: TestColumnIO.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadUsingSchemaWithRequiredFieldThatWasOptional(){
  MessageType originalSchema = new MessageType("schema",
          new PrimitiveType(OPTIONAL, INT32, "e"));
  MemPageStore store = new MemPageStore(1);
  SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
  writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));

  try {
    MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema",
            new PrimitiveType(REQUIRED, INT32, "e")); // Incompatible schema: required when it was optional
    readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1);
    fail("should have thrown an incompatible schema exception");
  } catch (ParquetDecodingException e) {
    assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage());
  }
}
 
Example 2
Source Project: Bats   Source File: Metadata.java    License: Apache License 2.0 6 votes vote down vote up
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
  if (type.isPrimitive()) {
    PrimitiveType primitiveType = (PrimitiveType) type;
    int precision = 0;
    int scale = 0;
    if (primitiveType.getDecimalMetadata() != null) {
      precision = primitiveType.getDecimalMetadata().getPrecision();
      scale = primitiveType.getDecimalMetadata().getScale();
    }

    int repetitionLevel = schema.getMaxRepetitionLevel(path);
    int definitionLevel = schema.getMaxDefinitionLevel(path);

    return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
  }
  Type t = ((GroupType) type).getType(path[depth]);
  return getColTypeInfo(schema, t, path, depth + 1);
}
 
Example 3
Source Project: Bats   Source File: Metadata_V2.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void serialize(ColumnMetadata_v2 value, JsonGenerator jgen, SerializerProvider provider)
    throws IOException, JsonProcessingException {
  jgen.writeStartObject();
  jgen.writeArrayFieldStart("name");
  for (String n : value.name) {
    jgen.writeString(n);
  }
  jgen.writeEndArray();
  if (value.mxValue != null) {
    Object val;
    if (value.primitiveType == PrimitiveType.PrimitiveTypeName.BINARY && value.mxValue != null) {
      val = new String(((Binary) value.mxValue).getBytes());
    } else {
      val = value.mxValue;
    }
    jgen.writeObjectField("mxValue", val);
  }
  if (value.nulls != null) {
    jgen.writeObjectField("nulls", value.nulls);
  }
  jgen.writeEndObject();
}
 
Example 4
Source Project: Bats   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
protected PrimitiveType getPrimitiveType(MaterializedField field) {
  MinorType minorType = field.getType().getMinorType();
  String name = field.getName();
  int length = ParquetTypeHelper.getLengthForMinorType(minorType);
  PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
  if (Types.isDecimalType(minorType)) {
    primitiveTypeName = logicalTypeForDecimals;
    if (usePrimitiveTypesForDecimals) {
      if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
        primitiveTypeName = PrimitiveTypeName.INT32;
      } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
        primitiveTypeName = PrimitiveTypeName.INT64;
      }
    }

    length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
  }

  Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
  OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
  DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
  return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
 
Example 5
Source Project: presto   Source File: TestParquetPredicateUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testParquetTupleDomainPrimitive()
{
    HiveColumnHandle columnHandle = createBaseColumn("my_primitive", 0, HiveType.valueOf("bigint"), BIGINT, REGULAR, Optional.empty());
    Domain singleValueDomain = Domain.singleValue(BIGINT, 123L);
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, singleValueDomain));

    MessageType fileSchema = new MessageType("hive_schema", new PrimitiveType(OPTIONAL, INT64, "my_primitive"));

    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);

    assertEquals(tupleDomain.getDomains().get().size(), 1);
    ColumnDescriptor descriptor = tupleDomain.getDomains().get().keySet().iterator().next();
    assertEquals(descriptor.getPath().length, 1);
    assertEquals(descriptor.getPath()[0], "my_primitive");

    Domain predicateDomain = Iterables.getOnlyElement(tupleDomain.getDomains().get().values());
    assertEquals(predicateDomain, singleValueDomain);
}
 
Example 6
Source Project: presto   Source File: TestParquetPredicateUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testParquetTupleDomainStruct()
{
    RowType rowType = rowType(
            RowType.field("a", INTEGER),
            RowType.field("b", INTEGER));

    HiveColumnHandle columnHandle = createBaseColumn("my_struct", 0, HiveType.valueOf("struct<a:int,b:int>"), rowType, REGULAR, Optional.empty());
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType)));

    MessageType fileSchema = new MessageType("hive_schema",
            new GroupType(OPTIONAL, "my_struct",
                    new PrimitiveType(OPTIONAL, INT32, "a"),
                    new PrimitiveType(OPTIONAL, INT32, "b")));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
    assertTrue(tupleDomain.isAll());
}
 
Example 7
Source Project: presto   Source File: TestParquetPredicateUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testParquetTupleDomainMap()
{
    MapType mapType = new MapType(
            INTEGER,
            INTEGER,
            methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"),
            methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"),
            methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"),
            methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"));

    HiveColumnHandle columnHandle = createBaseColumn("my_map", 0, HiveType.valueOf("map<int,int>"), mapType, REGULAR, Optional.empty());

    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));

    MessageType fileSchema = new MessageType("hive_schema",
            new GroupType(OPTIONAL, "my_map",
                    new GroupType(REPEATED, "map",
                            new PrimitiveType(REQUIRED, INT32, "key"),
                            new PrimitiveType(OPTIONAL, INT32, "value"))));

    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
    assertTrue(tupleDomain.isAll());
}
 
Example 8
Source Project: presto   Source File: MessageTypeConverter.java    License: Apache License 2.0 6 votes vote down vote up
private static org.apache.parquet.format.Type getType(PrimitiveType.PrimitiveTypeName type)
{
    switch (type) {
        case INT64:
            return Type.INT64;
        case INT32:
            return Type.INT32;
        case BOOLEAN:
            return Type.BOOLEAN;
        case BINARY:
            return Type.BYTE_ARRAY;
        case FLOAT:
            return Type.FLOAT;
        case DOUBLE:
            return Type.DOUBLE;
        case INT96:
            return Type.INT96;
        case FIXED_LEN_BYTE_ARRAY:
            return Type.FIXED_LEN_BYTE_ARRAY;
        default:
            throw new RuntimeException("Unknown primitive type " + type);
    }
}
 
Example 9
Source Project: pxf   Source File: ParquetResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
private MessageType getParquetSchemaForPrimitiveTypes(Type.Repetition repetition, boolean readCase) {
    List<Type> fields = new ArrayList<>();

    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "s1", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "s2", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "n1", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, "d1", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 16, "dc1", OriginalType.DECIMAL, new DecimalMetadata(38, 18), null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tm", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, "f", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT64, "bg", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, "b", null));

    // GPDB only has int16 and not int8 type, so for write tiny numbers int8 are still treated as shorts in16
    OriginalType tinyType = readCase ? OriginalType.INT_8 : OriginalType.INT_16;
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "tn", tinyType));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "sml", OriginalType.INT_16));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "vc1", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "c1", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "bin", null));

    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tmtz", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tmtz2", null));

    return new MessageType("hive_schema", fields);
}
 
Example 10
private Schema addLogicalTypeToSchema(
    Schema schema,
    OriginalType annotation,
    PrimitiveType asPrimitive,
    PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName
) {
  LogicalType logicalType = convertOriginalTypeToLogicalType(
      annotation, asPrimitive.getDecimalMetadata());
  if (logicalType != null && (annotation != DECIMAL ||
      parquetPrimitiveTypeName == BINARY ||
      parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
    schema = logicalType.addToSchema(schema);
  }

  return schema;
}
 
Example 11
Source Project: presto   Source File: TestMetadataReader.java    License: Apache License 2.0 6 votes vote down vote up
@Test(dataProvider = "allCreatedBy")
public void testReadStatsInt64(Optional<String> fileCreatedBy)
{
    Statistics statistics = new Statistics();
    statistics.setNull_count(13);
    statistics.setMin(fromHex("F6FFFFFFFFFFFFFF"));
    statistics.setMax(fromHex("3AA4000000000000"));
    assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, INT64, "Test column")))
            .isInstanceOfSatisfying(LongStatistics.class, columnStatistics -> {
                assertEquals(columnStatistics.getNumNulls(), 13);
                assertEquals(columnStatistics.getMin(), -10);
                assertEquals(columnStatistics.getMax(), 42042);
                assertEquals(columnStatistics.genericGetMin(), (Long) (long) -10L);
                assertEquals(columnStatistics.genericGetMax(), (Long) 42042L);
            });
}
 
Example 12
Source Project: garmadon   Source File: HiveClientTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void createTableWithoutIssue() throws SQLException {
    PrimitiveType appId = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "app_id");

    MessageType schema = new MessageType("fs", appId);

    String table = "fs";
    String location = "file:" + hdfsTemp + "/garmadon_database/fs";
    HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon",
        hdfsTemp + "/garmadon_database");
    hiveClient.createTableIfNotExist(table, schema, location);

    HashMap<String, String> result = getResultHashTableDesc(hiveClient, table);
    assertEquals(location, result.get("Location"));
    assertEquals("EXTERNAL_TABLE", result.get("Table Type").trim());
    assertEquals("string", result.get("day"));
    assertEquals("string", result.get("app_id"));
}
 
Example 13
Source Project: garmadon   Source File: HiveClientTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldProvideHiveTypeFromParquetType() throws Exception {
    HiveClient hiveClient = new HiveClient(driverName, "jdbc:hive2://localhost:" + port, "garmadon",
        hdfsTemp + "/garmadon_database");

    PrimitiveType string = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BINARY, "name");
    assertEquals("string", hiveClient.inferHiveType(string));

    PrimitiveType array_string = new PrimitiveType(Type.Repetition.REPEATED, PrimitiveType.PrimitiveTypeName.BINARY, "name");
    assertEquals("array<string>", hiveClient.inferHiveType(array_string));

    PrimitiveType int32 = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT32, "name");
    assertEquals("int", hiveClient.inferHiveType(int32));

    PrimitiveType int64 = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.INT64, "name");
    assertEquals("bigint", hiveClient.inferHiveType(int64));

    PrimitiveType floatz = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.FLOAT, "name");
    assertEquals("float", hiveClient.inferHiveType(floatz));

    PrimitiveType doublez = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.DOUBLE, "name");
    assertEquals("double", hiveClient.inferHiveType(doublez));

    PrimitiveType booleanz = new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BOOLEAN, "name");
    assertEquals("boolean", hiveClient.inferHiveType(booleanz));
}
 
Example 14
Source Project: iceberg   Source File: ParquetMetricsRowGroupFilter.java    License: Apache License 2.0 6 votes vote down vote up
private boolean eval(MessageType fileSchema, BlockMetaData rowGroup) {
  if (rowGroup.getRowCount() <= 0) {
    return ROWS_CANNOT_MATCH;
  }

  this.stats = Maps.newHashMap();
  this.valueCounts = Maps.newHashMap();
  this.conversions = Maps.newHashMap();
  for (ColumnChunkMetaData col : rowGroup.getColumns()) {
    PrimitiveType colType = fileSchema.getType(col.getPath().toArray()).asPrimitiveType();
    if (colType.getId() != null) {
      int id = colType.getId().intValue();
      stats.put(id, col.getStatistics());
      valueCounts.put(id, col.getValueCount());
      conversions.put(id, ParquetConversions.converterFromParquet(colType));
    }
  }

  return ExpressionVisitors.visitEvaluator(expr, this);
}
 
Example 15
Source Project: Bats   Source File: ParquetColumnMetadata.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns data type length for a given {@see ColumnDescriptor} and it's corresponding
 * {@see SchemaElement}. Neither is enough information alone as the max
 * repetition level (indicating if it is an array type) is in the ColumnDescriptor and
 * the length of a fixed width field is stored at the schema level.
 *
 * @return the length if fixed width, else <tt>UNDEFINED_LENGTH</tt> (-1)
 */
public int getDataTypeLength() {
  if (! isFixedLength()) {
    return UNDEFINED_LENGTH;
  } else if (isRepeated()) {
    return UNDEFINED_LENGTH;
  } else if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
    return se.getType_length() * 8;
  } else {
    return getTypeLengthInBits(column.getType());
  }
}
 
Example 16
Source Project: parquet-mr   Source File: TestStatistics.java    License: Apache License 2.0 5 votes vote down vote up
private void testBuilder(PrimitiveType type, Object min, byte[] minBytes, Object max, byte[] maxBytes) {
  Statistics.Builder builder = Statistics.getBuilderForReading(type);
  Statistics<?> stats = builder.build();
  assertTrue(stats.isEmpty());
  assertFalse(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());

  builder = Statistics.getBuilderForReading(type);
  stats = builder.withNumNulls(0).withMin(minBytes).build();
  assertFalse(stats.isEmpty());
  assertTrue(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());
  assertEquals(0, stats.getNumNulls());

  builder = Statistics.getBuilderForReading(type);
  stats = builder.withNumNulls(11).withMax(maxBytes).build();
  assertFalse(stats.isEmpty());
  assertTrue(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());
  assertEquals(11, stats.getNumNulls());

  builder = Statistics.getBuilderForReading(type);
  stats = builder.withNumNulls(42).withMin(minBytes).withMax(maxBytes).build();
  assertFalse(stats.isEmpty());
  assertTrue(stats.isNumNullsSet());
  assertTrue(stats.hasNonNullValue());
  assertEquals(42, stats.getNumNulls());
  assertEquals(min, stats.genericGetMin());
  assertEquals(max, stats.genericGetMax());
}
 
Example 17
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 5 votes vote down vote up
private void testV2StatsEqualMinMax(PrimitiveType type, Object min, Object max) {
  Statistics<?> stats = createStats(type, min, max);
  org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats);
  assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min);
  assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max);
  assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min_value);
  assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max_value);
}
 
Example 18
Source Project: pxf   Source File: ParquetOperatorPrunerAndTransformer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns the parquet primitive type for the given column index
 *
 * @param operatorNode the operator node
 * @return the parquet primitive type for the given column index
 */
private PrimitiveType.PrimitiveTypeName getPrimitiveType(OperatorNode operatorNode) {
    ColumnIndexOperandNode columnIndexOperand = operatorNode.getColumnIndexOperand();
    ColumnDescriptor columnDescriptor = columnDescriptors.get(columnIndexOperand.index());
    String filterColumnName = columnDescriptor.columnName();
    Type type = fields.get(filterColumnName);
    return type.asPrimitiveType().getPrimitiveTypeName();
}
 
Example 19
Source Project: Bats   Source File: Metadata_V1.java    License: Apache License 2.0 5 votes vote down vote up
@JsonProperty(value = "min")
public Object getMin() {
  if (primitiveType == PrimitiveType.PrimitiveTypeName.BINARY && min != null) {
    return new String(((Binary) min).getBytes());
  }
  return min;
}
 
Example 20
Source Project: parquet-mr   Source File: DoubleColumnIndexBuilder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
ColumnIndexBase<Double> createColumnIndex(PrimitiveType type) {
  if (invalid) {
    return null;
  }
  DoubleColumnIndex columnIndex = new DoubleColumnIndex(type);
  columnIndex.minValues = minValues.toDoubleArray();
  columnIndex.maxValues = maxValues.toDoubleArray();
  return columnIndex;
}
 
Example 21
Source Project: parquet-mr   Source File: Statistics.java    License: Apache License 2.0 5 votes vote down vote up
Statistics(PrimitiveType type) {
  this.type = type;
  this.comparator = type.comparator();
  this.stringifier = type.stringifier();
  hasNonNullValue = false;
  num_nulls = 0;
}
 
Example 22
Source Project: flink   Source File: RowConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addBinary(Binary value) {
	// in case it is a timestamp type stored as INT96
	if (primitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.INT96)) {
		parentDataHolder.add(pos, new Timestamp(ParquetTimestampUtils.getTimestampMillis(value)));
		return;
	}

	if (originalType != null) {
		switch (originalType) {
			case DECIMAL:
				parentDataHolder.add(pos, new BigDecimal(value.toStringUsingUTF8().toCharArray()));
				break;
			case UTF8:
			case ENUM:
			case JSON:
			case BSON:
				parentDataHolder.add(pos, value.toStringUsingUTF8());
				break;
			default:
				throw new UnsupportedOperationException("Unsupported original type : " + originalType.name()
					+ " for primitive type BINARY");
		}
	} else {
		parentDataHolder.add(pos, value.toStringUsingUTF8());
	}
}
 
Example 23
Source Project: parquet-mr   Source File: ThriftSchemaConvertVisitor.java    License: Apache License 2.0 5 votes vote down vote up
private ConvertedField visitPrimitiveType(PrimitiveTypeName type, LogicalTypeAnnotation orig, State state) {
  PrimitiveBuilder<PrimitiveType> b = primitive(type, state.repetition);

  if (orig != null) {
    b = b.as(orig);
  }

  if (fieldProjectionFilter.keep(state.path)) {
    return new Keep(state.path, b.named(state.name));
  } else {
    return new Drop(state.path);
  }
}
 
Example 24
Source Project: parquet-mr   Source File: DataWritableReadSupport.java    License: Apache License 2.0 5 votes vote down vote up
/**
 *
 * It creates the readContext for Parquet side with the requested schema during the init phase.
 *
 * @param configuration needed to get the wanted columns
 * @param keyValueMetaData // unused
 * @param fileSchema parquet file schema
 * @return the parquet ReadContext
 */
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration,
    final Map<String, String> keyValueMetaData, final MessageType fileSchema) {
  final String columns = configuration.get(IOConstants.COLUMNS);
  final Map<String, String> contextMetadata = new HashMap<String, String>();
  if (columns != null) {
    final List<String> listColumns = getColumns(columns);

    final List<Type> typeListTable = new ArrayList<Type>();
    for (final String col : listColumns) {
      // listColumns contains partition columns which are metadata only
      if (fileSchema.containsField(col)) {
        typeListTable.add(fileSchema.getType(col));
      } else {
        // below allows schema evolution
        typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
      }
    }
    MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable);
    contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString());

    MessageType requestedSchemaByUser = tableSchema;
    final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);

    final List<Type> typeListWanted = new ArrayList<Type>();
    for (final Integer idx : indexColumnsWanted) {
      typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
    }
    requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
            typeListWanted), fileSchema, configuration);

    return new ReadContext(requestedSchemaByUser, contextMetadata);
  } else {
    contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString());
    return new ReadContext(fileSchema, contextMetadata);
  }
}
 
Example 25
Source Project: Bats   Source File: Metadata_V3.java    License: Apache License 2.0 5 votes vote down vote up
public ColumnMetadata_v3(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, Object minValue, Object maxValue, Long nulls) {
  this.name = name;
  this.minValue = minValue;
  this.maxValue = maxValue;
  this.nulls = nulls;
  this.primitiveType = primitiveType;
}
 
Example 26
Source Project: parquet-mr   Source File: ColumnIOFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void visit(PrimitiveType primitiveType) {
  if (!currentRequestedType.isPrimitive() || 
          (this.strictTypeChecking && currentRequestedType.asPrimitiveType().getPrimitiveTypeName() != primitiveType.getPrimitiveTypeName())) {
    incompatibleSchema(primitiveType, currentRequestedType);
  }
  PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, currentRequestedIndex, leaves.size());
  current.add(newIO);
  leaves.add(newIO);
}
 
Example 27
Source Project: flink   Source File: FixedLenBytesColumnReader.java    License: Apache License 2.0 5 votes vote down vote up
public FixedLenBytesColumnReader(
		ColumnDescriptor descriptor,
		PageReader pageReader,
		int precision) throws IOException {
	super(descriptor, pageReader);
	checkTypeName(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY);
	this.precision = precision;
}
 
Example 28
Source Project: parquet-mr   Source File: MetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container, List<String> cpath) {
  if (type instanceof GroupType) {
    showDetails(out, type.asGroupType(), depth, container, cpath);
    return;
  } else if (type instanceof PrimitiveType) {
    showDetails(out, type.asPrimitiveType(), depth, container, cpath);
    return;
  }
}
 
Example 29
Source Project: parquet-mr   Source File: TupleConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
final public void start() {
  currentTuple = TF.newTuple(schemaSize);
  if (elephantBirdCompatible) {
    try {
      int i = 0;
      for (Type field : parquetSchema.getFields()) {
        if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
          PrimitiveType primitiveType = field.asPrimitiveType();
          switch (primitiveType.getPrimitiveTypeName()) {
          case INT32:
            currentTuple.set(i, I32_ZERO);
            break;
          case INT64:
            currentTuple.set(i, I64_ZERO);
            break;
          case FLOAT:
            currentTuple.set(i, FLOAT_ZERO);
            break;
          case DOUBLE:
            currentTuple.set(i, DOUBLE_ZERO);
            break;
          case BOOLEAN:
            currentTuple.set(i, I32_ZERO);
            break;
          }
        }
        ++ i;
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
}
 
Example 30
Source Project: parquet-mr   Source File: FloatColumnIndexBuilder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
ColumnIndexBase<Float> createColumnIndex(PrimitiveType type) {
  if (invalid) {
    return null;
  }
  FloatColumnIndex columnIndex = new FloatColumnIndex(type);
  columnIndex.minValues = minValues.toFloatArray();
  columnIndex.maxValues = maxValues.toFloatArray();
  return columnIndex;
}