Java Code Examples for org.apache.iceberg.Schema

The following examples show how to use org.apache.iceberg.Schema. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: iceberg   Source File: RowDataReader.java    License: Apache License 2.0 6 votes vote down vote up
private CloseableIterable<InternalRow> newAvroIterable(
    InputFile location,
    FileScanTask task,
    Schema projection,
    Map<Integer, ?> idToConstant) {
  Avro.ReadBuilder builder = Avro.read(location)
      .reuseContainers()
      .project(projection)
      .split(task.start(), task.length())
      .createReaderFunc(readSchema -> new SparkAvroReader(projection, readSchema, idToConstant));

  if (nameMapping != null) {
    builder.withNameMapping(NameMappingParser.fromJson(nameMapping));
  }

  return builder.build();
}
 
Example 2
Source Project: dremio-oss   Source File: TestIcebergSerDe.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() {
  schema = new Schema(
    required(0, "id", Types.LongType.get()),
    required(1, "data", Types.StringType.get()),
    required(2, "b", Types.BooleanType.get()),
    required(3, "i", Types.IntegerType.get()),
    required(4, "l", Types.LongType.get()),
    required(5, "f", Types.FloatType.get()),
    required(6, "d", Types.DoubleType.get()),
    required(7, "date", Types.DateType.get()),
    required(8, "ts", Types.TimestampType.withZone()),
    required(9, "s", Types.StringType.get()),
    required(10, "bytes", Types.BinaryType.get()),
    required(11, "dec_9_0", Types.DecimalType.of(9, 0)),
    required(12, "dec_11_2", Types.DecimalType.of(11, 2)),
    required(13, "dec_38_10", Types.DecimalType.of(38, 10))
  );
}
 
Example 3
Source Project: iceberg   Source File: IcebergStorage.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getPredicateFields(String location, Job job) throws IOException {
  LOG.info("[{}]: getPredicateFields() -> {}", signature, location);
  Schema schema = load(location, job).schema();

  List<String> result = Lists.newArrayList();

  for (Types.NestedField nf : schema.columns()) {
    switch (nf.type().typeId()) {
      case MAP:
      case LIST:
      case STRUCT:
        continue;
      default:
        result.add(nf.name());
    }
  }

  return result;
}
 
Example 4
Source Project: iceberg   Source File: RandomData.java    License: Apache License 2.0 6 votes vote down vote up
private static Iterable<Record> newIterable(Supplier<RandomDataGenerator> newGenerator,
                                            Schema schema, int numRecords) {
  return () -> new Iterator<Record>() {
    private int count = 0;
    private RandomDataGenerator generator = newGenerator.get();

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public Record next() {
      if (count >= numRecords) {
        throw new NoSuchElementException();
      }
      count += 1;
      return (Record) TypeUtil.visit(schema, generator);
    }
  };
}
 
Example 5
Source Project: iceberg   Source File: ORCSchemaUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Convert an ORC schema to an Iceberg schema. This method handles the convertion from the original
 * Iceberg column mapping IDs if present in the ORC column attributes, otherwise, ORC column IDs
 * will be assigned following ORCs pre-order ID assignment.
 *
 * @return the Iceberg schema
 */
public static Schema convert(TypeDescription orcSchema) {
  List<TypeDescription> children = orcSchema.getChildren();
  List<String> childrenNames = orcSchema.getFieldNames();
  Preconditions.checkState(children.size() == childrenNames.size(),
      "Error in ORC file, children fields and names do not match.");

  List<Types.NestedField> icebergFields = Lists.newArrayListWithExpectedSize(children.size());
  AtomicInteger lastColumnId = new AtomicInteger(getMaxIcebergId(orcSchema));
  for (int i = 0; i < children.size(); i++) {
    icebergFields.add(convertOrcToIceberg(children.get(i), childrenNames.get(i),
        lastColumnId::incrementAndGet));
  }

  return new Schema(icebergFields);
}
 
Example 6
Source Project: iceberg   Source File: TestTruncatesProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIntegerInclusiveLowerBound() {
  Integer value = 100;
  Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "90");
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "100");
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100");
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "100");
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "100");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  assertProjectionInclusive(spec, in("value", value - 1, value, value + 1),
      Expression.Operation.IN, "[90, 100, 100]");
  assertProjectionInclusiveValue(spec, notIn("value", value, value + 1), Expression.Operation.TRUE);
}
 
Example 7
Source Project: iceberg   Source File: TestBuildOrcProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testProjectionNestedNoOp() {
  Types.StructType nestedStructType = Types.StructType.of(
      optional(2, "b", Types.StringType.get()),
      optional(3, "c", Types.DateType.get())
  );
  Schema originalSchema = new Schema(
      optional(1, "a", nestedStructType)
  );

  // Original mapping (stored in ORC)
  TypeDescription orcSchema = ORCSchemaUtil.convert(originalSchema);

  TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(originalSchema, orcSchema);
  assertEquals(1, newOrcSchema.getChildren().size());
  assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("a").getCategory());
  TypeDescription nestedCol = newOrcSchema.findSubtype("a");
  assertEquals(2, nestedCol.findSubtype("b").getId());
  assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory());
  assertEquals(3, nestedCol.findSubtype("c").getId());
  assertEquals(TypeDescription.Category.DATE, nestedCol.findSubtype("c").getCategory());
}
 
Example 8
Source Project: iceberg   Source File: ParquetSchemaUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Prunes columns from a Parquet file schema that was written without field ids.
 * <p>
 * Files that were written without field ids are read assuming that schema evolution preserved
 * column order. Deleting columns was not allowed.
 * <p>
 * The order of columns in the resulting Parquet schema matches the Parquet file.
 *
 * @param fileSchema schema from a Parquet file that does not have field ids.
 * @param expectedSchema expected schema
 * @return a parquet schema pruned using the expected schema
 */
public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) {
  Set<Integer> selectedIds = Sets.newHashSet();

  for (Types.NestedField field : expectedSchema.columns()) {
    selectedIds.add(field.fieldId());
  }

  MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage();

  int ordinal = 1;
  for (Type type : fileSchema.getFields()) {
    if (selectedIds.contains(ordinal)) {
      builder.addField(type.withId(ordinal));
    }
    ordinal += 1;
  }

  return builder.named(fileSchema.getName());
}
 
Example 9
Source Project: iceberg   Source File: TestORCSchemaUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRoundtripConversionPrimitive() {
  Schema expectedSchema = new Schema(
      optional(1, "intCol", Types.IntegerType.get()),
      optional(3, "longCol", Types.LongType.get()),
      optional(6, "intCol2", Types.IntegerType.get()),
      optional(20, "intCol3", Types.IntegerType.get()),
      required(9, "doubleCol", Types.DoubleType.get()),
      required(10, "uuidCol", Types.UUIDType.get()),
      optional(2, "booleanCol", Types.BooleanType.get()),
      optional(21, "fixedCol", Types.FixedType.ofLength(4096)),
      required(22, "binaryCol", Types.BinaryType.get()),
      required(23, "stringCol", Types.StringType.get()),
      required(24, "decimalCol", Types.DecimalType.of(15, 3)),
      required(25, "floatCol", Types.FloatType.get()),
      optional(30, "dateCol", Types.DateType.get()),
      required(32, "timeCol", Types.TimeType.get()),
      required(34, "timestampCol", Types.TimestampType.withZone())
  );
  TypeDescription orcSchema = ORCSchemaUtil.convert(expectedSchema);
  assertEquals(expectedSchema.asStruct(), ORCSchemaUtil.convert(orcSchema).asStruct());
}
 
Example 10
Source Project: iceberg   Source File: TestBucketingProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketIntegerStrict() {
  Integer value = 100;
  Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build();

  // the bucket number of the value (i.e. 100) is 6
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "6");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThanOrEqual("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThanOrEqual("value", value), Expression.Operation.FALSE);

  assertProjectionStrict(spec, notIn("value", value - 1, value, value + 1),
      Expression.Operation.NOT_IN, "[6, 7, 8]");
  assertProjectionStrictValue(spec, in("value", value, value + 1), Expression.Operation.FALSE);
}
 
Example 11
Source Project: iceberg   Source File: TestReadProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReorderedProjection() throws Exception {
  Schema schema = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get()),
      Types.NestedField.optional(1, "data", Types.StringType.get())
  );

  Record record = GenericRecord.create(schema.asStruct());
  record.setField("id", 34L);
  record.setField("data", "test");

  Schema reordered = new Schema(
      Types.NestedField.optional(2, "missing_1", Types.StringType.get()),
      Types.NestedField.optional(1, "data", Types.StringType.get()),
      Types.NestedField.optional(3, "missing_2", Types.LongType.get())
  );

  Record projected = writeAndRead("full_projection", schema, reordered, record);

  Assert.assertNull("Should contain the correct 0 value", projected.get(0));
  Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString());
  Assert.assertNull("Should contain the correct 2 value", projected.get(2));
}
 
Example 12
Source Project: iceberg   Source File: Writer.java    License: Apache License 2.0 6 votes vote down vote up
Writer(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
       DataSourceOptions options, boolean replacePartitions, String applicationId, String wapId,
       Schema writeSchema, StructType dsSchema) {
  this.table = table;
  this.format = getFileFormat(table.properties(), options);
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.replacePartitions = replacePartitions;
  this.applicationId = applicationId;
  this.wapId = wapId;
  this.writeSchema = writeSchema;
  this.dsSchema = dsSchema;

  long tableTargetFileSize = PropertyUtil.propertyAsLong(
      table.properties(), WRITE_TARGET_FILE_SIZE_BYTES, WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
  this.targetFileSize = options.getLong("target-file-size-bytes", tableTargetFileSize);
}
 
Example 13
Source Project: iceberg   Source File: TestParquetReadProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected GenericData.Record writeAndRead(String desc,
                                          Schema writeSchema,
                                          Schema readSchema,
                                          GenericData.Record record)
    throws IOException {
  File file = temp.newFile(desc + ".parquet");
  file.delete();

  try (FileAppender<GenericData.Record> appender = Parquet.write(Files.localOutput(file))
      .schema(writeSchema)
      .build()) {
    appender.add(record);
  }

  Iterable<GenericData.Record> records = Parquet.read(Files.localInput(file))
      .project(readSchema)
      .callInit()
      .build();

  return Iterables.getOnlyElement(records);
}
 
Example 14
Source Project: iceberg   Source File: HadoopTables.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a table using the FileSystem implementation resolve from
 * location.
 *
 * @param schema iceberg schema used to create the table
 * @param spec partitioning spec, if null the table will be unpartitioned
 * @param properties a string map of table properties, initialized to empty if null
 * @param location a path URI (e.g. hdfs:///warehouse/my_table)
 * @return newly created table implementation
 */
@Override
public Table create(Schema schema, PartitionSpec spec, Map<String, String> properties,
                    String location) {
  Preconditions.checkNotNull(schema, "A table schema is required");

  TableOperations ops = newTableOps(location);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table already exists at location: " + location);
  }

  Map<String, String> tableProps = properties == null ? ImmutableMap.of() : properties;
  PartitionSpec partitionSpec = spec == null ? PartitionSpec.unpartitioned() : spec;
  TableMetadata metadata = TableMetadata.newTableMetadata(schema, partitionSpec, location, tableProps);
  ops.commit(null, metadata);

  return new BaseTable(ops, location);
}
 
Example 15
Source Project: iceberg   Source File: TestTruncatesProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testStringInclusive() {
  String value = "abcdefg";
  Schema schema = new Schema(optional(1, "value", Types.StringType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "abcde");
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "abcde");
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "abcde");
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "abcde");
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "abcde");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  assertProjectionInclusive(spec, in("value", value, value + "abc"),
      Expression.Operation.IN, "[abcde, abcde]");
  assertProjectionInclusiveValue(spec, notIn("value", value, value + "abc"), Expression.Operation.TRUE);
}
 
Example 16
Source Project: iceberg   Source File: TestTruncatesProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryInclusive() throws Exception {
  ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8"));
  Schema schema = new Schema(optional(1, "value", Types.BinaryType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
  String expectedValue = TransformUtil.base64encode(ByteBuffer.wrap("abcde".getBytes("UTF-8")));

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, expectedValue);
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, expectedValue);
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, expectedValue);
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, expectedValue);
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, expectedValue);
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  ByteBuffer anotherValue = ByteBuffer.wrap("abcdehij".getBytes("UTF-8"));
  assertProjectionInclusive(spec, in("value", value, anotherValue),
      Expression.Operation.IN, String.format("[%s, %s]", expectedValue, expectedValue));
  assertProjectionInclusiveValue(spec, notIn("value", value, anotherValue), Expression.Operation.TRUE);
}
 
Example 17
Source Project: iceberg   Source File: IcebergSource.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public StreamWriter createStreamWriter(String runId, StructType dsStruct,
                                       OutputMode mode, DataSourceOptions options) {
  Preconditions.checkArgument(
      mode == OutputMode.Append() || mode == OutputMode.Complete(),
      "Output mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  // Spark 2.4.x passes runId to createStreamWriter instead of real queryId,
  // so we fetch it directly from sparkContext to make writes idempotent
  String queryId = lazySparkSession().sparkContext().getLocalProperty(StreamExecution.QUERY_ID_KEY());
  String appId = lazySparkSession().sparkContext().applicationId();

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new StreamingWriter(table, io, encryptionManager, options, queryId, mode, appId, writeSchema, dsStruct);
}
 
Example 18
Source Project: iceberg   Source File: TestResiduals.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIn() {
  Schema schema = new Schema(
      Types.NestedField.optional(50, "dateint", Types.IntegerType.get()),
      Types.NestedField.optional(51, "hour", Types.IntegerType.get())
  );

  PartitionSpec spec = PartitionSpec.builderFor(schema)
      .identity("dateint")
      .build();

  ResidualEvaluator resEval = ResidualEvaluator.of(spec,
      in("dateint", 20170815, 20170816, 20170817), true);

  Expression residual = resEval.residualFor(Row.of(20170815));
  Assert.assertEquals("Residual should be alwaysTrue", alwaysTrue(), residual);

  residual = resEval.residualFor(Row.of(20180815));
  Assert.assertEquals("Residual should be alwaysFalse", alwaysFalse(), residual);
}
 
Example 19
Source Project: iceberg   Source File: TestTruncatesProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDecimalInclusiveUpperBound() {
  Types.DecimalType type = Types.DecimalType.of(9, 2);
  BigDecimal value = (BigDecimal) Literal.of("99.99").to(type).value();
  Schema schema = new Schema(optional(1, "value", type));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "99.90");
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "99.90");
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100.00");
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "99.90");
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "99.90");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  BigDecimal delta = new BigDecimal(1);
  assertProjectionInclusive(spec, in("value", value.add(delta), value, value.subtract(delta)),
      Expression.Operation.IN, "[98.90, 99.90, 100.90]");
  assertProjectionInclusiveValue(spec, notIn("value", value, value.subtract(delta)), Expression.Operation.TRUE);
}
 
Example 20
Source Project: iceberg   Source File: TestIcebergInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testProjection() throws Exception {
  File location = temp.newFolder(format.name());
  Assert.assertTrue(location.delete());
  Schema projectedSchema = TypeUtil.select(SCHEMA, ImmutableSet.of(1));
  Table table = tables.create(SCHEMA, SPEC,
                              ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()),
                              location.toString());
  List<Record> inputRecords = RandomGenericData.generate(table.schema(), 1, 0L);
  DataFile dataFile = writeFile(table, Row.of("2020-03-20", 0), format, inputRecords);
  table.newAppend()
       .appendFile(dataFile)
       .commit();

  Job job = Job.getInstance(conf);
  IcebergInputFormat.ConfigBuilder configBuilder = IcebergInputFormat.configure(job);
  configBuilder
      .readFrom(location.toString())
      .project(projectedSchema);
  List<Record> outputRecords = readRecords(job.getConfiguration());
  Assert.assertEquals(inputRecords.size(), outputRecords.size());
  Assert.assertEquals(projectedSchema.asStruct(), outputRecords.get(0).struct());
}
 
Example 21
@Override
protected Table initTable() {
  Schema schema = new Schema(
      optional(1, "longCol", Types.LongType.get()),
      optional(2, "intCol", Types.IntegerType.get()),
      optional(3, "floatCol", Types.FloatType.get()),
      optional(4, "doubleCol", Types.DoubleType.get()),
      optional(5, "decimalCol", Types.DecimalType.of(20, 5)),
      optional(6, "dateCol", Types.DateType.get()),
      optional(7, "timestampCol", Types.TimestampType.withZone()),
      optional(8, "stringCol", Types.StringType.get()));
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = parquetWriteProps();
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}
 
Example 22
Source Project: iceberg   Source File: TestAvroReadProjection.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected GenericData.Record writeAndRead(String desc,
                                          Schema writeSchema,
                                          Schema readSchema,
                                          GenericData.Record record)
    throws IOException {
  File file = temp.newFile(desc + ".avro");
  file.delete();

  try (FileAppender<GenericData.Record> appender = Avro.write(Files.localOutput(file))
      .schema(writeSchema)
      .build()) {
    appender.add(record);
  }

  Iterable<GenericData.Record> records = Avro.read(Files.localInput(file))
      .project(readSchema)
      .build();

  return Iterables.getOnlyElement(records);
}
 
Example 23
Source Project: iceberg   Source File: TestGenericRecord.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetNullValue() {
  Types.LongType type = Types.LongType.get();
  Schema schema = new Schema(optional(1, "id", type));
  GenericRecord record = GenericRecord.create(schema);
  record.set(0, null);

  Assert.assertNull(record.get(0, type.typeId().javaClass()));
}
 
Example 24
Source Project: iceberg   Source File: GenericParquetReaders.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static ParquetValueReader<Record> buildReader(Schema expectedSchema,
                                                     MessageType fileSchema,
                                                     Map<Integer, ?> idToConstant) {
  if (ParquetSchemaUtil.hasIds(fileSchema)) {
    return (ParquetValueReader<Record>)
        TypeWithSchemaVisitor.visit(expectedSchema.asStruct(), fileSchema,
            new ReadBuilder(fileSchema, idToConstant));
  } else {
    return (ParquetValueReader<Record>)
        TypeWithSchemaVisitor.visit(expectedSchema.asStruct(), fileSchema,
            new FallbackReadBuilder(fileSchema, idToConstant));
  }
}
 
Example 25
Source Project: iceberg   Source File: TestNameMapping.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFailsDuplicateId() {
  // the schema can be created because ID indexing is lazy
  AssertHelpers.assertThrows("Should fail if IDs are reused",
      IllegalArgumentException.class, "Multiple entries with same",
      () -> new Schema(
          required(1, "id", Types.LongType.get()),
          required(1, "data", Types.StringType.get())));
}
 
Example 26
Source Project: iceberg   Source File: TestTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReassignIdsDuplicateColumns() {
  Schema schema = new Schema(
      required(0, "a", Types.IntegerType.get()),
      required(1, "A", Types.IntegerType.get())
  );
  Schema sourceSchema = new Schema(
      required(1, "a", Types.IntegerType.get()),
      required(2, "A", Types.IntegerType.get())
  );
  final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema);
  Assert.assertEquals(sourceSchema.asStruct(), actualSchema.asStruct());
}
 
Example 27
Source Project: iceberg   Source File: AvroDataTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testArrayOfStructs() throws IOException {
  Schema schema = TypeUtil.assignIncreasingFreshIds(new Schema(
      required(0, "id", LongType.get()),
      optional(1, "data", ListType.ofOptional(2, SUPPORTED_PRIMITIVES))));

  writeAndValidate(schema);
}
 
Example 28
Source Project: iceberg   Source File: TestFilteredScan.java    License: Apache License 2.0 5 votes vote down vote up
private static Record projectFlat(Schema projection, Record record) {
  org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(projection, "test");
  Record result = new Record(avroSchema);
  List<Types.NestedField> fields = projection.asStruct().fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Types.NestedField field = fields.get(i);
    result.put(i, record.get(field.name()));
  }
  return result;
}
 
Example 29
Source Project: presto   Source File: IcebergFileWriterFactory.java    License: Apache License 2.0 5 votes vote down vote up
private IcebergFileWriter createParquetWriter(
        Path outputPath,
        Schema icebergSchema,
        List<IcebergColumnHandle> columns,
        JobConf jobConf,
        ConnectorSession session)
{
    Properties properties = new Properties();
    properties.setProperty(IOConstants.COLUMNS, columns.stream()
            .map(IcebergColumnHandle::getName)
            .collect(joining(",")));
    properties.setProperty(IOConstants.COLUMNS_TYPES, columns.stream()
            .map(column -> toHiveType(column.getType()).getHiveTypeName().toString())
            .collect(joining(":")));

    setParquetSchema(jobConf, convert(icebergSchema, "table"));
    jobConf.set(ParquetOutputFormat.COMPRESSION, getCompressionCodec(session).getParquetCompressionCodec().name());

    return new IcebergRecordFileWriter(
            outputPath,
            columns.stream()
                    .map(IcebergColumnHandle::getName)
                    .collect(toImmutableList()),
            fromHiveStorageFormat(HiveStorageFormat.PARQUET),
            properties,
            HiveStorageFormat.PARQUET.getEstimatedWriterSystemMemoryUsage(),
            jobConf,
            typeManager,
            session);
}
 
Example 30
Source Project: iceberg   Source File: SchemaUtilTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void nestedTuples() throws IOException {
  convertToPigSchema(new Schema(
      optional(1, "first", StructType.of(
          optional(2, "second", StructType.of(
              optional(3, "third", StructType.of(
                  optional(4, "val", StringType.get())
              ))
          ))
      ))
  ), "first:(second:(third:(val:chararray)))", "");
}