org.apache.iceberg.PartitionSpec Java Examples

The following examples show how to use org.apache.iceberg.PartitionSpec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecimalInclusiveLowerBound() {
  Types.DecimalType type = Types.DecimalType.of(9, 2);
  BigDecimal value = (BigDecimal) Literal.of("100.00").to(type).value();
  Schema schema = new Schema(optional(1, "value", type));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "99.90");
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "100.00");
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100.00");
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "100.00");
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "100.00");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  BigDecimal delta = new BigDecimal(1);
  assertProjectionInclusive(spec, in("value", value.add(delta), value, value.subtract(delta)),
      Expression.Operation.IN, "[99.00, 100.00, 101.00]");
  assertProjectionInclusiveValue(spec, notIn("value", value, value.add(delta)), Expression.Operation.TRUE);
}
 
Example #2
Source File: TestTimestampsProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDayInclusiveLowerBound() {
  Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value();
  PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build();

  assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30");
  assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01");
  assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01");
  assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01");
  assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12-01");
  assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE);

  Long anotherDate = (long) Literal.of("2017-12-02T00:00:00.00000").to(TYPE).value();
  assertProjectionInclusive(spec, in("timestamp", date, anotherDate),
      Expression.Operation.IN, "[2017-12-01, 2017-12-02]");
  assertProjectionInclusiveValue(spec, notIn("timestamp", date, anotherDate), Expression.Operation.TRUE);
}
 
Example #3
Source File: TestTimestampsProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDayStrictLowerBound() {
  Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value();
  PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build();

  assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12-01");
  assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12-01");
  assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12-01");
  assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-11-30");
  assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01");
  assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);

  Long anotherDate = (long) Literal.of("2017-12-02T00:00:00.00000").to(TYPE).value();
  assertProjectionStrict(spec, notIn("timestamp", date, anotherDate),
      Expression.Operation.NOT_IN, "[2017-12-01, 2017-12-02]");
  assertProjectionStrictValue(spec, in("timestamp", date, anotherDate), Expression.Operation.FALSE);
}
 
Example #4
Source File: TestIcebergManifests.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Test
public void testManifestCount() throws Exception{
  int insertCount = 5;
  int partitionValueSize = 1024;
  int dataFilesCount = 1;
  String columnName = "data";
  String expectedValue = "abc";
  PartitionSpec partitionSpec = PartitionSpec
    .builderFor(schema)
    .identity(columnName)
    .build();

  IcebergPartitionData icebergPartitionData = new IcebergPartitionData(partitionSpec.partitionType());
  icebergPartitionData.setString(0, expectedValue);
  int manifestCount = getManifestFileCount(partitionSpec, partitionValueSize, dataFilesCount, columnName, insertCount);
  Assert.assertTrue(manifestCount < insertCount);
}
 
Example #5
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringStrict() {
  String value = "abcdefg";
  Schema schema = new Schema(optional(1, "value", Types.StringType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();

  assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "abcde");
  assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "abcde");
  assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "abcde");
  assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "abcde");
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "abcde");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);

  assertProjectionStrict(spec, notIn("value", value, value + "abc"),
      Expression.Operation.NOT_IN, "[abcde, abcde]");
  assertProjectionStrictValue(spec, in("value", value, value + "abc"), Expression.Operation.FALSE);
}
 
Example #6
Source File: IcebergPageSinkProvider.java    From presto with Apache License 2.0 6 votes vote down vote up
private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle)
{
    HdfsContext hdfsContext = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName());
    Schema schema = SchemaParser.fromJson(tableHandle.getSchemaAsJson());
    PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, tableHandle.getPartitionSpecAsJson());
    return new IcebergPageSink(
            schema,
            partitionSpec,
            tableHandle.getOutputPath(),
            fileWriterFactory,
            pageIndexerFactory,
            hdfsEnvironment,
            hdfsContext,
            tableHandle.getInputColumns(),
            jsonCodec,
            session,
            tableHandle.getFileFormat());
}
 
Example #7
Source File: TestResiduals.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnpartitionedResiduals() {
  Expression[] expressions = new Expression[] {
      Expressions.alwaysTrue(),
      Expressions.alwaysFalse(),
      Expressions.lessThan("a", 5),
      Expressions.greaterThanOrEqual("b", 16),
      Expressions.notNull("c"),
      Expressions.isNull("d"),
      Expressions.in("e", 1, 2, 3),
      Expressions.notIn("f", 1, 2, 3)
  };

  for (Expression expr : expressions) {
    ResidualEvaluator residualEvaluator = ResidualEvaluator.of(PartitionSpec.unpartitioned(), expr, true);
    Assert.assertEquals("Should return expression",
        expr, residualEvaluator.residualFor(Row.of()));
  }
}
 
Example #8
Source File: TestTimestampsProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testYearStrictUpperBound() {
  Long date = (long) Literal.of("2017-12-31T23:59:59.999999").to(TYPE).value();
  PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build();

  assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017");
  assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2018");
  assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017");
  assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017");
  assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017");
  assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);

  Long anotherDate = (long) Literal.of("2016-12-31T23:59:59.999999").to(TYPE).value();
  assertProjectionStrict(spec, notIn("timestamp", date, anotherDate),
      Expression.Operation.NOT_IN, "[2016, 2017]");
  assertProjectionStrictValue(spec, in("timestamp", date, anotherDate), Expression.Operation.FALSE);
}
 
Example #9
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecimalInclusiveUpperBound() {
  Types.DecimalType type = Types.DecimalType.of(9, 2);
  BigDecimal value = (BigDecimal) Literal.of("99.99").to(type).value();
  Schema schema = new Schema(optional(1, "value", type));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "99.90");
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "99.90");
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100.00");
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "99.90");
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "99.90");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  BigDecimal delta = new BigDecimal(1);
  assertProjectionInclusive(spec, in("value", value.add(delta), value, value.subtract(delta)),
      Expression.Operation.IN, "[98.90, 99.90, 100.90]");
  assertProjectionInclusiveValue(spec, notIn("value", value, value.subtract(delta)), Expression.Operation.TRUE);
}
 
Example #10
Source File: TestResiduals.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testNotIn() {
  Schema schema = new Schema(
      Types.NestedField.optional(50, "dateint", Types.IntegerType.get()),
      Types.NestedField.optional(51, "hour", Types.IntegerType.get())
  );

  PartitionSpec spec = PartitionSpec.builderFor(schema)
      .identity("dateint")
      .build();

  ResidualEvaluator resEval = ResidualEvaluator.of(spec,
      notIn("dateint", 20170815, 20170816, 20170817), true);

  Expression residual = resEval.residualFor(Row.of(20180815));
  Assert.assertEquals("Residual should be alwaysTrue", alwaysTrue(), residual);

  residual = resEval.residualFor(Row.of(20170815));
  Assert.assertEquals("Residual should be alwaysFalse", alwaysFalse(), residual);
}
 
Example #11
Source File: TestBucketingProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketByteBufferStrict() throws Exception {
  ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8"));
  Schema schema = new Schema(optional(1, "value", Types.BinaryType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build();

  // the bucket number of the value (i.e. "abcdefg") is 4
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "4");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThanOrEqual("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThanOrEqual("value", value), Expression.Operation.FALSE);

  ByteBuffer anotherValue = ByteBuffer.wrap("abcdehij".getBytes("UTF-8"));
  assertProjectionStrict(spec, notIn("value", value, anotherValue),
      Expression.Operation.NOT_IN, "[4, 6]");
  assertProjectionStrictValue(spec, in("value", value, anotherValue), Expression.Operation.FALSE);
}
 
Example #12
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
                                      Expression.Operation expectedOp, String expectedLiteral) {
  Expression projection = Projections.inclusive(spec).project(filter);
  UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);

  Assert.assertEquals(predicate.op(), expectedOp);

  Assert.assertNotEquals("Inclusive projection never runs for NOT_IN", Expression.Operation.NOT_IN, predicate.op());

  Truncate transform = (Truncate) spec.getFieldsBySourceId(1).get(0).transform();
  if (predicate.op() == Expression.Operation.IN) {
    Iterable<?> values = Iterables.transform(predicate.literals(), Literal::value);
    String actual = Lists.newArrayList(values).stream().sorted()
        .map(v -> transform.toHumanString(v)).collect(Collectors.toList()).toString();
    Assert.assertEquals(expectedLiteral, actual);
  } else {
    Literal literal = predicate.literal();
    String output = transform.toHumanString(literal.value());
    Assert.assertEquals(expectedLiteral, output);
  }
}
 
Example #13
Source File: TestBucketingProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketUUIDInclusive() {
  UUID value = new UUID(123L, 456L);
  Schema schema = new Schema(optional(1, "value", Types.UUIDType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build();

  // the bucket number of the value (i.e. UUID(123L, 456L)) is 4
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "4");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, lessThan("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, lessThanOrEqual("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, greaterThan("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, greaterThanOrEqual("value", value), Expression.Operation.TRUE);

  UUID anotherValue = new UUID(456L, 123L);
  assertProjectionInclusive(spec, in("value", value, anotherValue),
      Expression.Operation.IN, "[4, 6]");
  assertProjectionInclusiveValue(spec, notIn("value", value, anotherValue), Expression.Operation.TRUE);
}
 
Example #14
Source File: TestTimestampsProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testMonthStrictLowerBound() {
  Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value();
  PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build();

  assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12");
  assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12");
  assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12");
  assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-11");
  assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12");
  assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);

  Long anotherDate = (long) Literal.of("2017-12-02T00:00:00.00000").to(TYPE).value();
  assertProjectionStrict(spec, notIn("timestamp", anotherDate, date),
      Expression.Operation.NOT_IN, "[2017-12, 2017-12]");
  assertProjectionStrictValue(spec, in("timestamp", anotherDate, date), Expression.Operation.FALSE);
}
 
Example #15
Source File: TestBucketingProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
                                      Expression.Operation expectedOp, String expectedLiteral) {
  Expression projection = Projections.inclusive(spec).project(filter);
  UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);

  Assert.assertEquals(predicate.op(), expectedOp);

  Assert.assertNotEquals("Inclusive projection never runs for NOT_IN", Expression.Operation.NOT_IN, predicate.op());

  Bucket transform = (Bucket) spec.getFieldsBySourceId(1).get(0).transform();
  if (predicate.op() == Expression.Operation.IN) {
    Iterable<?> values = Iterables.transform(predicate.literals(), Literal::value);
    String actual = Lists.newArrayList(values).stream().sorted()
        .map(v -> transform.toHumanString(v)).collect(Collectors.toList()).toString();
    Assert.assertEquals(expectedLiteral, actual);
  } else {
    Literal literal = predicate.literal();
    String output = transform.toHumanString(literal.value());
    Assert.assertEquals(expectedLiteral, output);
  }
}
 
Example #16
Source File: TestBucketingProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketIntegerInclusive() {
  Integer value = 100;
  Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build();

  // the bucket number of the value (i.e. 100) is 6
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "6");
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, lessThan("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, lessThanOrEqual("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, greaterThan("value", value), Expression.Operation.TRUE);
  assertProjectionInclusiveValue(spec, greaterThanOrEqual("value", value), Expression.Operation.TRUE);

  assertProjectionInclusive(spec, in("value", value - 1, value, value + 1),
      Expression.Operation.IN, "[6, 7, 8]");
  assertProjectionInclusiveValue(spec, notIn("value", value, value + 1), Expression.Operation.TRUE);
}
 
Example #17
Source File: TestHadoopCatalog.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateAndDropTableWithoutNamespace() throws Exception {
  Configuration conf = new Configuration();
  String warehousePath = temp.newFolder().getAbsolutePath();
  HadoopCatalog catalog = new HadoopCatalog(conf, warehousePath);

  TableIdentifier testTable = TableIdentifier.of("tbl");
  Table table = catalog.createTable(testTable, SCHEMA, PartitionSpec.unpartitioned());

  Assert.assertEquals(table.schema().toString(), TABLE_SCHEMA.toString());
  Assert.assertEquals("hadoop.tbl", table.toString());
  String metaLocation = catalog.defaultWarehouseLocation(testTable);

  FileSystem fs = Util.getFs(new Path(metaLocation), conf);
  Assert.assertTrue(fs.isDirectory(new Path(metaLocation)));

  catalog.dropTable(testTable);
  Assert.assertFalse(fs.isDirectory(new Path(metaLocation)));
}
 
Example #18
Source File: VectorizedReadFlatParquetDataBenchmark.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
protected Table initTable() {
  Schema schema = new Schema(
      optional(1, "longCol", Types.LongType.get()),
      optional(2, "intCol", Types.IntegerType.get()),
      optional(3, "floatCol", Types.FloatType.get()),
      optional(4, "doubleCol", Types.DoubleType.get()),
      optional(5, "decimalCol", Types.DecimalType.of(20, 5)),
      optional(6, "dateCol", Types.DateType.get()),
      optional(7, "timestampCol", Types.TimestampType.withZone()),
      optional(8, "stringCol", Types.StringType.get()));
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = parquetWriteProps();
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}
 
Example #19
Source File: IcebergSourceNestedDataBenchmark.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
protected final Table initTable() {
  Schema schema = new Schema(
      required(0, "id", Types.LongType.get()),
      optional(4, "nested", Types.StructType.of(
          required(1, "col1", Types.StringType.get()),
          required(2, "col2", Types.DoubleType.get()),
          required(3, "col3", Types.LongType.get())
      ))
  );
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}
 
Example #20
Source File: IcebergSourceFlatDataBenchmark.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
protected final Table initTable() {
  Schema schema = new Schema(
      required(1, "longCol", Types.LongType.get()),
      required(2, "intCol", Types.IntegerType.get()),
      required(3, "floatCol", Types.FloatType.get()),
      optional(4, "doubleCol", Types.DoubleType.get()),
      optional(5, "decimalCol", Types.DecimalType.of(20, 5)),
      optional(6, "dateCol", Types.DateType.get()),
      optional(7, "timestampCol", Types.TimestampType.withZone()),
      optional(8, "stringCol", Types.StringType.get()));
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}
 
Example #21
Source File: IcebergSourceFlatORCDataBenchmark.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
protected final Table initTable() {
  Schema schema = new Schema(
      required(1, "longCol", Types.LongType.get()),
      required(2, "intCol", Types.IntegerType.get()),
      required(3, "floatCol", Types.FloatType.get()),
      optional(4, "doubleCol", Types.DoubleType.get()),
      optional(5, "decimalCol", Types.DecimalType.of(20, 5)),
      optional(6, "dateCol", Types.DateType.get()),
      // Disable timestamp column for ORC performance tests as Spark native reader does not support ORC's
      // TIMESTAMP_INSTANT type
      // optional(7, "timestampCol", Types.TimestampType.withZone()),
      optional(8, "stringCol", Types.StringType.get()));
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}
 
Example #22
Source File: TestSnapshotSelection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testSnapshotSelectionByInvalidTimestamp() throws IOException {
  long timestamp = System.currentTimeMillis();

  String tableLocation = temp.newFolder("iceberg-table").toString();
  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  tables.create(SCHEMA, spec, tableLocation);

  Dataset<Row> df = spark.read()
      .format("iceberg")
      .option("as-of-timestamp", timestamp)
      .load(tableLocation);

  df.collectAsList();
}
 
Example #23
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecimalStrictLowerBound() {
  Types.DecimalType type = Types.DecimalType.of(9, 2);
  BigDecimal value = (BigDecimal) Literal.of("100.00").to(type).value();
  Schema schema = new Schema(optional(1, "value", type));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "100.00");
  assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100.00");
  assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "100.00");
  assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "99.90");
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "100.00");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);

  BigDecimal delta = new BigDecimal(1);
  assertProjectionStrict(spec, notIn("value", value.add(delta), value, value.subtract(delta)),
          Expression.Operation.NOT_IN, "[99.00, 100.00, 101.00]");
  assertProjectionStrictValue(spec, in("value", value, value.add(delta)), Expression.Operation.FALSE);
}
 
Example #24
Source File: TestWriteMetricsConfig.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testFullMetricsCollectionForParquet() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.DEFAULT_WRITE_METRICS_MODE, "full");
  Table table = tables.create(SIMPLE_SCHEMA, spec, properties, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );
  Dataset<Row> df = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  df.select("id", "data")
      .coalesce(1)
      .write()
      .format("iceberg")
      .option("write-format", "parquet")
      .mode("append")
      .save(tableLocation);

  for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) {
    DataFile file = task.file();
    Assert.assertEquals(2, file.nullValueCounts().size());
    Assert.assertEquals(2, file.valueCounts().size());
    Assert.assertEquals(2, file.lowerBounds().size());
    Assert.assertEquals(2, file.upperBounds().size());
  }
}
 
Example #25
Source File: TestStartsWith.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection,
                              Expression.Operation expectedOp) {
  UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
  Literal literal = predicate.literal();
  Truncate<CharSequence> transform = (Truncate<CharSequence>) spec.getFieldsBySourceId(1).get(0).transform();
  String output = transform.toHumanString((String) literal.value());

  Assert.assertEquals(expectedOp, predicate.op());
  Assert.assertEquals(expectedLiteral, output);
}
 
Example #26
Source File: TestDataSourceOptions.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteFormatOptionOverridesTableProperties() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  options.put(TableProperties.DEFAULT_FILE_FORMAT, "avro");
  Table table = tables.create(SCHEMA, spec, options, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );
  Dataset<Row> df = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  df.select("id", "data").write()
      .format("iceberg")
      .option("write-format", "parquet")
      .mode("append")
      .save(tableLocation);

  try (CloseableIterable<FileScanTask> tasks = table.newScan().planFiles()) {
    tasks.forEach(task -> {
      FileFormat fileFormat = FileFormat.fromFileName(task.file().path());
      Assert.assertEquals(FileFormat.PARQUET, fileFormat);
    });
  }
}
 
Example #27
Source File: TestSnapshotSelection.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testSnapshotSelectionByInvalidSnapshotId() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  tables.create(SCHEMA, spec, tableLocation);

  Dataset<Row> df = spark.read()
      .format("iceberg")
      .option("snapshot-id", -10)
      .load(tableLocation);

  df.collectAsList();
}
 
Example #28
Source File: IcebergCatalog.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static PartitionSpec getIcebergPartitionSpec(BatchSchema batchSchema,
                                                    List<String> partitionColumns) {
  SchemaConverter schemaConverter = new SchemaConverter();
  Schema schema;

  // match partition column name with name in schema
  List<String> partitionColumnsInSchemaCase = new ArrayList<>();
  if (partitionColumns != null) {
    List<String> invalidPartitionColumns = new ArrayList<>();
    for (String partitionColumn : partitionColumns) {
      Optional<Field> fieldFromSchema = batchSchema.findFieldIgnoreCase(partitionColumn);
      if (fieldFromSchema.isPresent()) {
        if (fieldFromSchema.get().getType().getTypeID() == ArrowType.ArrowTypeID.Time) {
          throw UserException.validationError().message("Partition column %s of type time is not supported", fieldFromSchema.get().getName()).buildSilently();
        }
        partitionColumnsInSchemaCase.add(fieldFromSchema.get().getName());
      } else {
        invalidPartitionColumns.add(partitionColumn);
      }
    }
    if (!invalidPartitionColumns.isEmpty()) {
      throw UserException.validationError().message("Partition column(s) %s are not found in table.", invalidPartitionColumns).buildSilently();
    }
  }

  try {
    schema = schemaConverter.toIceberg(batchSchema);
    PartitionSpec.Builder partitionSpecBuilder = PartitionSpec.builderFor(schema);
      for (String column : partitionColumnsInSchemaCase) {
        partitionSpecBuilder.identity(column);
    }
    return partitionSpecBuilder.build();
  } catch (Exception ex) {
    throw UserException.validationError(ex).buildSilently();
  }
}
 
Example #29
Source File: TestSparkDataWrite.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnpartitionedOverwrite() throws IOException {
  File parent = temp.newFolder(format.toString());
  File location = new File(parent, "test");

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Table table = tables.create(SCHEMA, spec, location.toString());

  List<SimpleRecord> expected = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );

  Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.class);

  df.select("id", "data").write()
      .format("iceberg")
      .option("write-format", format.toString())
      .mode("append")
      .save(location.toString());

  // overwrite with the same data; should not produce two copies
  df.select("id", "data").write()
      .format("iceberg")
      .option("write-format", format.toString())
      .mode("overwrite")
      .save(location.toString());

  table.refresh();

  Dataset<Row> result = spark.read()
      .format("iceberg")
      .load(location.toString());

  List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
  Assert.assertEquals("Number of rows should match", expected.size(), actual.size());
  Assert.assertEquals("Result rows should match", expected, actual);
}
 
Example #30
Source File: TestDataSourceOptions.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultMetadataSplitSize() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  tables.create(SCHEMA, spec, options, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b")
  );
  Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);

  int splitSize = (int) TableProperties.METADATA_SPLIT_SIZE_DEFAULT; // 32MB split size

  int expectedSplits = ((int) tables.load(tableLocation + "#entries")
      .currentSnapshot().allManifests().get(0).length() + splitSize - 1) / splitSize;

  Dataset<Row> metadataDf = spark.read()
      .format("iceberg")
      .load(tableLocation + "#entries");

  int partitionNum = metadataDf.javaRDD().getNumPartitions();
  Assert.assertEquals("Spark partitions should match", expectedSplits, partitionNum);
}