Java Code Examples for org.apache.iceberg.Table#newTransaction()

The following examples show how to use org.apache.iceberg.Table#newTransaction() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIcebergPartitions.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonIdentityPartitions() throws Exception {
  File root = tempDir.newFolder();
  HadoopTables tables = new HadoopTables(conf);
  PartitionSpec partitionSpec = PartitionSpec
      .builderFor(schema)
      .bucket(NAME, 2)
      .build();
  Table table = tables.create(schema, partitionSpec, root.getAbsolutePath());

  // Append some data files.
  Transaction transaction = table.newTransaction();
  AppendFiles appendFiles = transaction.newAppend();
  appendFiles.appendFile(createDataFile(root, "d1", 1, "jack", 100));
  appendFiles.appendFile(createDataFile(root, "d2", 1, "jack", 200));
  appendFiles.appendFile(createDataFile(root, "d3", 2, "jill", 300));
  appendFiles.appendFile(createDataFile(root, "d4", 2, "jill", 400));
  appendFiles.appendFile(createDataFile(root, "d5", 2, "jill", 500));
  appendFiles.commit();
  transaction.commitTransaction();

  try {
    IcebergTableInfo tableInfo = new IcebergTableWrapper(getSabotContext(),
        HadoopFileSystem.get(fs), conf, root.getAbsolutePath()).getTableInfo();
    fail("Expected error while reading metadata of iceberg table with non-identity partition field");
  } catch (Exception ex) {
    Assert.assertTrue("UserException expected", ex instanceof UserException);
    UserException uex = ((UserException) ex);
    Assert.assertEquals("Invalid ErrorType. Expected " + UserBitShared.DremioPBError.ErrorType.UNSUPPORTED_OPERATION
            + " but got " + uex.getErrorType(), UserBitShared.DremioPBError.ErrorType.UNSUPPORTED_OPERATION, uex.getErrorType());
    String expectedErrorMsg = "Column values and partition values are not same for [name] column";
    Assert.assertTrue("Expected message to contain " + expectedErrorMsg + " but was "
        + uex.getOriginalMessage() + " instead", uex.getOriginalMessage().contains(expectedErrorMsg));
  }
}
 
Example 2
Source File: TestCreateTable.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testDroppingOfMapTypeColumn() throws Exception{
  String table1 = "iceberg_map_test";
  try {
    File table1Folder = new File(getDfsTestTmpSchemaLocation(), table1);
    HadoopTables hadoopTables = new HadoopTables(new Configuration());

    Schema schema = new Schema(
      Types.NestedField.optional(1, "col1", Types.MapType.ofOptional(1, 2, Types.IntegerType.get(), Types.StringType.get())),
      Types.NestedField.optional(2, "col2", Types.IntegerType.get())
    );
    PartitionSpec spec = PartitionSpec
      .builderFor(schema)
      .build();
    Table table = hadoopTables.create(schema, spec, table1Folder.getPath());
    Transaction transaction = table.newTransaction();
    AppendFiles appendFiles = transaction.newAppend();
    final String testWorkingPath = TestTools.getWorkingPath() + "/src/test/resources/iceberg/mapTest";
    final String parquetFile = "iceberg_map_test.parquet";
    File dataFile = new File(testWorkingPath, parquetFile);
    appendFiles.appendFile(
      DataFiles.builder(spec)
        .withInputFile(Files.localInput(dataFile))
        .withRecordCount(1)
        .withFormat(FileFormat.PARQUET)
        .build()
    );
    appendFiles.commit();
    transaction.commitTransaction();

    testBuilder()
      .sqlQuery("select * from dfs_test.iceberg_map_test")
      .unOrdered()
      .baselineColumns("col2")
      .baselineValues(1)
      .build()
      .run();

    Thread.sleep(1001);
    String insertCommandSql = "insert into  dfs_test.iceberg_map_test select * from (values(2))";
    test(insertCommandSql);
    Thread.sleep(1001);

    testBuilder()
      .sqlQuery("select * from dfs_test.iceberg_map_test")
      .unOrdered()
      .baselineColumns("col2")
      .baselineValues(1)
      .baselineValues(2)
      .build()
      .run();
  }
  finally {
    FileUtils.deleteQuietly(new File(getDfsTestTmpSchemaLocation(), table1));
  }
}
 
Example 3
Source File: TestIcebergTableDrop.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testDropTable() throws Exception {
  try (AutoCloseable c = enableIcebergTables()) {
    Path rootPath = Paths.get(getDfsTestTmpSchemaLocation(), "iceberg", "nation");
    Files.createDirectories(rootPath);
    String root = rootPath.toString();

    String tableName = "dfs_test.iceberg.nation";

    HadoopTables tables = new HadoopTables(conf);
    Table table = tables.create(schema, null, root);
    IcebergTableInfo tableInfo =
        new IcebergTableWrapper(getSabotContext(), HadoopFileSystem.get(fs), conf, root)
            .getTableInfo();
    assertEquals(tableInfo.getRecordCount(), 0);

    // Append some data files.
    Transaction transaction = table.newTransaction();
    AppendFiles appendFiles = transaction.newAppend();
    appendFiles.appendFile(createDataFile(rootPath.toFile(), "d1"));
    appendFiles.commit();
    transaction.commitTransaction();

    testBuilder()
        .sqlQuery("select count(*) c from " + tableName)
        .unOrdered()
        .baselineColumns("c")
        .baselineValues(25L)
        .build()
        .run();

    testBuilder()
        .sqlQuery("DROP TABLE " + tableName)
        .unOrdered()
        .baselineColumns("ok", "summary")
        .baselineValues(true, String.format("Table [%s] dropped", tableName))
        .build()
        .run();

    errorMsgTestHelper(
        "select count(*) c from " + tableName, "Table '" + tableName + "' not found");
  }
}
 
Example 4
Source File: TestRefresh.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testRefresh() throws Exception {
  try (AutoCloseable c = enableIcebergTables()) {
    Path rootPath = Paths.get(getDfsTestTmpSchemaLocation(), "iceberg", "metadata_refresh");
    Files.createDirectories(rootPath);
    String root = rootPath.toString();
    String tableName = "dfs_test.iceberg.metadata_refresh";

    HadoopTables tables = new HadoopTables(conf);
    Table table = tables.create(schema, null, root);

    IcebergTableInfo tableInfo =
        new IcebergTableWrapper(getSabotContext(), HadoopFileSystem.get(fs), conf, root)
            .getTableInfo();
    assertEquals(tableInfo.getRecordCount(), 0);

    // Append some data files.
    Transaction transaction = table.newTransaction();
    AppendFiles appendFiles = transaction.newAppend();
    appendFiles.appendFile(createDataFile(rootPath.toFile(), "d1"));
    appendFiles.commit();
    transaction.commitTransaction();

    testBuilder()
        .sqlQuery("select count(*) c from " + tableName)
        .unOrdered()
        .baselineColumns("c")
        .baselineValues(25L)
        .build()
        .run();

    // to detect an mtime change.
    Thread.sleep(1000);

    // refresh without an update
    testBuilder()
        .sqlQuery("ALTER TABLE " + tableName + " REFRESH METADATA")
        .unOrdered()
        .baselineColumns("ok", "summary")
        .baselineValues(
            true,
            String.format(
                "Table '%s' read signature reviewed but source stated metadata is unchanged, no refresh occurred.",
                tableName))
        .build()
        .run();

    // Do another append
    transaction = table.newTransaction();
    appendFiles = transaction.newAppend();
    appendFiles.appendFile(createDataFile(rootPath.toFile(), "d2"));
    appendFiles.commit();
    transaction.commitTransaction();

    // refresh
    testBuilder()
        .sqlQuery("ALTER TABLE " + tableName + " REFRESH METADATA")
        .unOrdered()
        .baselineColumns("ok", "summary")
        .baselineValues(true, String.format("Metadata for table '%s' refreshed.", tableName))
        .build()
        .run();

    // validate increased row count
    testBuilder()
        .sqlQuery("select count(*) c from " + tableName)
        .unOrdered()
        .baselineColumns("c")
        .baselineValues(50L)
        .build()
        .run();
  }
}
 
Example 5
Source File: TestIcebergPartitions.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartitions() throws Exception {
  File root = tempDir.newFolder();
  HadoopTables tables = new HadoopTables(conf);
  Table table = tables.create(schema, spec, root.getAbsolutePath());

  // test empty table.
  IcebergTableInfo tableInfo = new IcebergTableWrapper(getSabotContext(),
    HadoopFileSystem.get(fs), conf, root.getAbsolutePath()).getTableInfo();
  assertEquals(tableInfo.getRecordCount(), 0);

  List<String> expectedColumns = Arrays.asList(ID, NAME);
  assertEquals(expectedColumns, tableInfo.getPartitionColumns());

  assertEquals(0, ImmutableList.copyOf(tableInfo.getPartitionChunkListing().iterator()).size());

  // Append some data files.
  Transaction transaction = table.newTransaction();
  AppendFiles appendFiles = transaction.newAppend();
  appendFiles.appendFile(createDataFile(root, "d1", 1, "jack", 100));
  appendFiles.appendFile(createDataFile(root, "d2", 1, "jack", 200));
  appendFiles.appendFile(createDataFile(root, "d3", 2, "jill", 300));
  appendFiles.appendFile(createDataFile(root, "d4", 2, "jill", 400));
  appendFiles.appendFile(createDataFile(root, "d5", 2, "jill", 500));
  appendFiles.commit();
  transaction.commitTransaction();

  tableInfo = new IcebergTableWrapper(getSabotContext(),
    HadoopFileSystem.get(fs), conf, root.getAbsolutePath()).getTableInfo();
  assertEquals(1500, tableInfo.getRecordCount());
  assertEquals(2, ImmutableList.copyOf(tableInfo.getPartitionChunkListing().iterator()).size());

  // validate first partition
  final AtomicLong recordCount = new AtomicLong(0);
  PartitionChunk p1 = findPartition(ImmutableList.copyOf(tableInfo.getPartitionChunkListing().iterator()), 1, "jack");
  assertNotNull(p1);
  assertEquals(2, p1.getSplitCount());
  p1.getSplits().iterator().forEachRemaining(x -> recordCount.addAndGet(x.getRecordCount()));
  assertEquals(300, recordCount.intValue());

  // validate second partition
  PartitionChunk p2 = findPartition(ImmutableList.copyOf(tableInfo.getPartitionChunkListing().iterator()), 2, "jill");
  assertNotNull(p2);

  assertEquals(3, p2.getSplitCount());
  recordCount.set(0);
  p2.getSplits().iterator().forEachRemaining(x -> recordCount.addAndGet(x.getRecordCount()));
  assertEquals(1200, recordCount.intValue());
}