Java Code Examples for org.apache.iceberg.PartitionSpec#unpartitioned()

The following examples show how to use org.apache.iceberg.PartitionSpec#unpartitioned() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestSnapshotSelection.java From iceberg with Apache License 2.0

7 votes

@Test(expected = IllegalArgumentException.class)
public void testSnapshotSelectionBySnapshotIdAndTimestamp() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Table table = tables.create(SCHEMA, spec, tableLocation);

  List<SimpleRecord> firstBatchRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );
  Dataset<Row> firstDf = spark.createDataFrame(firstBatchRecords, SimpleRecord.class);
  firstDf.select("id", "data").write().format("iceberg").mode("append").save(tableLocation);

  long timestamp = System.currentTimeMillis();
  long snapshotId = table.currentSnapshot().snapshotId();
  Dataset<Row> df = spark.read()
      .format("iceberg")
      .option("snapshot-id", snapshotId)
      .option("as-of-timestamp", timestamp)
      .load(tableLocation);

  df.collectAsList();
}

Example 2

Source File: IcebergSourceNestedDataBenchmark.java From iceberg with Apache License 2.0

6 votes

@Override
protected final Table initTable() {
  Schema schema = new Schema(
      required(0, "id", Types.LongType.get()),
      optional(4, "nested", Types.StructType.of(
          required(1, "col1", Types.StringType.get()),
          required(2, "col2", Types.DoubleType.get()),
          required(3, "col3", Types.LongType.get())
      ))
  );
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}

Example 3

Source File: VectorizedReadFlatParquetDataBenchmark.java From iceberg with Apache License 2.0

6 votes

@Override
protected Table initTable() {
  Schema schema = new Schema(
      optional(1, "longCol", Types.LongType.get()),
      optional(2, "intCol", Types.IntegerType.get()),
      optional(3, "floatCol", Types.FloatType.get()),
      optional(4, "doubleCol", Types.DoubleType.get()),
      optional(5, "decimalCol", Types.DecimalType.of(20, 5)),
      optional(6, "dateCol", Types.DateType.get()),
      optional(7, "timestampCol", Types.TimestampType.withZone()),
      optional(8, "stringCol", Types.StringType.get()));
  PartitionSpec partitionSpec = PartitionSpec.unpartitioned();
  HadoopTables tables = new HadoopTables(hadoopConf());
  Map<String, String> properties = parquetWriteProps();
  return tables.create(schema, partitionSpec, properties, newTableLocation());
}

Example 4

Source File: TestRewriteManifestsAction.java From iceberg with Apache License 2.0

6 votes

@Test
public void testRewriteManifestsEmptyTable() throws IOException {
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  options.put(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, snapshotIdInheritanceEnabled);
  Table table = TABLES.create(SCHEMA, spec, options, tableLocation);

  Assert.assertNull("Table must be empty", table.currentSnapshot());

  Actions actions = Actions.forTable(table);

  actions.rewriteManifests()
      .rewriteIf(manifest -> true)
      .stagingLocation(temp.newFolder().toString())
      .execute();

  Assert.assertNull("Table must stay empty", table.currentSnapshot());
}

Example 5

Source File: HadoopTables.java From iceberg with Apache License 2.0

6 votes

/**
 * Create a table using the FileSystem implementation resolve from
 * location.
 *
 * @param schema iceberg schema used to create the table
 * @param spec partitioning spec, if null the table will be unpartitioned
 * @param properties a string map of table properties, initialized to empty if null
 * @param location a path URI (e.g. hdfs:///warehouse/my_table)
 * @return newly created table implementation
 */
@Override
public Table create(Schema schema, PartitionSpec spec, Map<String, String> properties,
                    String location) {
  Preconditions.checkNotNull(schema, "A table schema is required");

  TableOperations ops = newTableOps(location);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table already exists at location: " + location);
  }

  Map<String, String> tableProps = properties == null ? ImmutableMap.of() : properties;
  PartitionSpec partitionSpec = spec == null ? PartitionSpec.unpartitioned() : spec;
  TableMetadata metadata = TableMetadata.newTableMetadata(schema, partitionSpec, location, tableProps);
  ops.commit(null, metadata);

  return new BaseTable(ops, location);
}

Example 6

Source File: TestSparkSchema.java From iceberg with Apache License 2.0

5 votes

@Test
public void testSparkReadSchemaCombinedWithProjection() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  tables.create(SCHEMA, spec, null, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a")
  );
  Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);

  StructType sparkReadSchema =
      new StructType(
          new StructField[] {
              new StructField("id", DataTypes.IntegerType, true, Metadata.empty()),
              new StructField("data", DataTypes.StringType, true, Metadata.empty())
          }
      );

  Dataset<Row> resultDf = spark.read()
      .schema(sparkReadSchema)
      .format("iceberg")
      .load(tableLocation)
      .select("id");

  Row[] results = (Row[]) resultDf.collect();

  Assert.assertEquals("Result size matches", 1, results.length);
  Assert.assertEquals("Row length matches with sparkReadSchema", 1, results[0].length());
  Assert.assertEquals("Row content matches data", 1, results[0].getInt(0));
}

Example 7

Source File: TestSparkSchema.java From iceberg with Apache License 2.0

5 votes

@Test
public void testSparkReadSchemaIsHonored() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  tables.create(SCHEMA, spec, null, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a")
  );
  Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);

  StructType sparkReadSchema =
      new StructType(
          new StructField[] {
              new StructField("id", DataTypes.IntegerType, true, Metadata.empty())
          }
      );

  Dataset<Row> resultDf = spark.read()
      .schema(sparkReadSchema)
      .format("iceberg")
      .load(tableLocation);

  Row[] results = (Row[]) resultDf.collect();

  Assert.assertEquals("Result size matches", 1, results.length);
  Assert.assertEquals("Row length matches with sparkReadSchema", 1, results[0].length());
  Assert.assertEquals("Row content matches data", 1, results[0].getInt(0));
}

Example 8

Source File: TestDataSourceOptions.java From iceberg with Apache License 2.0

5 votes

@Test
public void testDefaultMetadataSplitSize() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  tables.create(SCHEMA, spec, options, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b")
  );
  Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);

  int splitSize = (int) TableProperties.METADATA_SPLIT_SIZE_DEFAULT; // 32MB split size

  int expectedSplits = ((int) tables.load(tableLocation + "#entries")
      .currentSnapshot().allManifests().get(0).length() + splitSize - 1) / splitSize;

  Dataset<Row> metadataDf = spark.read()
      .format("iceberg")
      .load(tableLocation + "#entries");

  int partitionNum = metadataDf.javaRDD().getNumPartitions();
  Assert.assertEquals("Spark partitions should match", expectedSplits, partitionNum);
}

Example 9

Source File: TestSparkDataWrite.java From iceberg with Apache License 2.0

5 votes

@Test
public void testUnpartitionedOverwrite() throws IOException {
  File parent = temp.newFolder(format.toString());
  File location = new File(parent, "test");

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Table table = tables.create(SCHEMA, spec, location.toString());

  List<SimpleRecord> expected = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );

  Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.class);

  df.select("id", "data").write()
      .format("iceberg")
      .option("write-format", format.toString())
      .mode("append")
      .save(location.toString());

  // overwrite with the same data; should not produce two copies
  df.select("id", "data").write()
      .format("iceberg")
      .option("write-format", format.toString())
      .mode("overwrite")
      .save(location.toString());

  table.refresh();

  Dataset<Row> result = spark.read()
      .format("iceberg")
      .load(location.toString());

  List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
  Assert.assertEquals("Number of rows should match", expected.size(), actual.size());
  Assert.assertEquals("Result rows should match", expected, actual);
}

Example 10

Source File: TestSparkSchema.java From iceberg with Apache License 2.0

5 votes

@Test
public void testFailIfSparkReadSchemaIsOff() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  tables.create(SCHEMA, spec, null, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a")
  );
  Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);

  StructType sparkReadSchema =
      new StructType(
          new StructField[] {
              new StructField("idd", DataTypes.IntegerType, true, Metadata.empty()) // wrong field name
          }
      );

  AssertHelpers.assertThrows("Iceberg should not allow a projection that contain unknown fields",
      java.lang.IllegalArgumentException.class, "Field idd not found in source schema",
      () ->
          spark.read()
              .schema(sparkReadSchema)
              .format("iceberg")
              .load(tableLocation)
  );
}

Example 11

Source File: TestDataSourceOptions.java From iceberg with Apache License 2.0

5 votes

@Test
public void testWriteFormatOptionOverridesTableProperties() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  options.put(TableProperties.DEFAULT_FILE_FORMAT, "avro");
  Table table = tables.create(SCHEMA, spec, options, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );
  Dataset<Row> df = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  df.select("id", "data").write()
      .format("iceberg")
      .option("write-format", "parquet")
      .mode("append")
      .save(tableLocation);

  try (CloseableIterable<FileScanTask> tasks = table.newScan().planFiles()) {
    tasks.forEach(task -> {
      FileFormat fileFormat = FileFormat.fromFileName(task.file().path());
      Assert.assertEquals(FileFormat.PARQUET, fileFormat);
    });
  }
}

Example 12

Source File: TestSparkDataWrite.java From iceberg with Apache License 2.0

5 votes

@Test
public void testWriteProjection() throws IOException {
  Assume.assumeTrue(
      "Not supported in Spark 3.0; analysis requires all columns are present",
      spark.version().startsWith("2"));

  File parent = temp.newFolder(format.toString());
  File location = new File(parent, "test");

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Table table = tables.create(SCHEMA, spec, location.toString());

  List<SimpleRecord> expected = Lists.newArrayList(
      new SimpleRecord(1, null),
      new SimpleRecord(2, null),
      new SimpleRecord(3, null)
  );

  Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.class);

  df.select("id").write() // select only id column
      .format("iceberg")
      .option("write-format", format.toString())
      .mode("append")
      .save(location.toString());

  table.refresh();

  Dataset<Row> result = spark.read()
      .format("iceberg")
      .load(location.toString());

  List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
  Assert.assertEquals("Number of rows should match", expected.size(), actual.size());
  Assert.assertEquals("Result rows should match", expected, actual);
}

Example 13

Source File: TestWriteMetricsConfig.java From iceberg with Apache License 2.0

5 votes

@Test
public void testNoMetricsCollectionForParquet() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.DEFAULT_WRITE_METRICS_MODE, "none");
  Table table = tables.create(SIMPLE_SCHEMA, spec, properties, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );
  Dataset<Row> df = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  df.select("id", "data")
      .coalesce(1)
      .write()
      .format("iceberg")
      .option("write-format", "parquet")
      .mode("append")
      .save(tableLocation);

  for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) {
    DataFile file = task.file();
    Assert.assertTrue(file.nullValueCounts().isEmpty());
    Assert.assertTrue(file.valueCounts().isEmpty());
    Assert.assertTrue(file.lowerBounds().isEmpty());
    Assert.assertTrue(file.upperBounds().isEmpty());
  }
}

Example 14

Source File: TestWriteMetricsConfig.java From iceberg with Apache License 2.0

5 votes

@Test
public void testCustomMetricCollectionForParquet() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> properties = Maps.newHashMap();
  properties.put(TableProperties.DEFAULT_WRITE_METRICS_MODE, "counts");
  properties.put("write.metadata.metrics.column.id", "full");
  Table table = tables.create(SIMPLE_SCHEMA, spec, properties, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );
  Dataset<Row> df = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  df.select("id", "data")
      .coalesce(1)
      .write()
      .format("iceberg")
      .option("write-format", "parquet")
      .mode("append")
      .save(tableLocation);

  Schema schema = table.schema();
  Types.NestedField id = schema.findField("id");
  for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) {
    DataFile file = task.file();
    Assert.assertEquals(2, file.nullValueCounts().size());
    Assert.assertEquals(2, file.valueCounts().size());
    Assert.assertEquals(1, file.lowerBounds().size());
    Assert.assertTrue(file.lowerBounds().containsKey(id.fieldId()));
    Assert.assertEquals(1, file.upperBounds().size());
    Assert.assertTrue(file.upperBounds().containsKey(id.fieldId()));
  }
}

Example 15

Source File: TestSnapshotSelection.java From iceberg with Apache License 2.0

5 votes

@Test(expected = IllegalArgumentException.class)
public void testSnapshotSelectionByInvalidSnapshotId() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  tables.create(SCHEMA, spec, tableLocation);

  Dataset<Row> df = spark.read()
      .format("iceberg")
      .option("snapshot-id", -10)
      .load(tableLocation);

  df.collectAsList();
}

Example 16

Source File: ResidualEvaluator.java From iceberg with Apache License 2.0

4 votes

UnpartitionedResidualEvaluator(Expression expr) {
  super(PartitionSpec.unpartitioned(), expr, false);
  this.expr = expr;
}

Example 17

Source File: TestRewriteDataFilesAction.java From iceberg with Apache License 2.0

4 votes

@Test
public void testRewriteDataFilesUnpartitionedTable() {
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  Table table = TABLES.create(SCHEMA, spec, options, tableLocation);

  List<ThreeColumnRecord> records1 = Lists.newArrayList(
      new ThreeColumnRecord(1, null, "AAAA"),
      new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB")
  );
  writeRecords(records1);

  List<ThreeColumnRecord> records2 = Lists.newArrayList(
      new ThreeColumnRecord(2, "CCCCCCCCCC", "CCCC"),
      new ThreeColumnRecord(2, "DDDDDDDDDD", "DDDD")
  );
  writeRecords(records2);

  table.refresh();

  CloseableIterable<FileScanTask> tasks = table.newScan().planFiles();
  List<DataFile> dataFiles = Lists.newArrayList(CloseableIterable.transform(tasks, FileScanTask::file));
  Assert.assertEquals("Should have 4 data files before rewrite", 4, dataFiles.size());

  Actions actions = Actions.forTable(table);

  RewriteDataFilesActionResult result = actions.rewriteDataFiles().execute();
  Assert.assertEquals("Action should rewrite 4 data files", 4, result.deletedDataFiles().size());
  Assert.assertEquals("Action should add 1 data file", 1, result.addedDataFiles().size());

  table.refresh();

  CloseableIterable<FileScanTask> tasks1 = table.newScan().planFiles();
  List<DataFile> dataFiles1 = Lists.newArrayList(CloseableIterable.transform(tasks1, FileScanTask::file));
  Assert.assertEquals("Should have 1 data files before rewrite", 1, dataFiles1.size());

  List<ThreeColumnRecord> expectedRecords = Lists.newArrayList();
  expectedRecords.addAll(records1);
  expectedRecords.addAll(records2);

  Dataset<Row> resultDF = spark.read().format("iceberg").load(tableLocation);
  List<ThreeColumnRecord> actualRecords = resultDF.sort("c1", "c2")
      .as(Encoders.bean(ThreeColumnRecord.class))
      .collectAsList();

  Assert.assertEquals("Rows must match", expectedRecords, actualRecords);
}

Example 18

Source File: TestDataSourceOptions.java From iceberg with Apache License 2.0

4 votes

@Test
public void testMetadataSplitSizeOptionOverrideTableProperties() throws IOException {
  String tableLocation = temp.newFolder("iceberg-table").toString();

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Map<String, String> options = Maps.newHashMap();
  Table table = tables.create(SCHEMA, spec, options, tableLocation);

  List<SimpleRecord> expectedRecords = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b")
  );
  Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.class);
  // produce 1st manifest
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);
  // produce 2nd manifest
  originalDf.select("id", "data").write()
      .format("iceberg")
      .mode("append")
      .save(tableLocation);

  List<ManifestFile> manifests = table.currentSnapshot().allManifests();

  Assert.assertEquals("Must be 2 manifests", 2, manifests.size());

  // set the target metadata split size so each manifest ends up in a separate split
  table.updateProperties()
      .set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(manifests.get(0).length()))
      .commit();

  Dataset<Row> entriesDf = spark.read()
      .format("iceberg")
      .load(tableLocation + "#entries");
  Assert.assertEquals("Num partitions must match", 2, entriesDf.javaRDD().getNumPartitions());

  // override the table property using options
  entriesDf = spark.read()
      .format("iceberg")
      .option("split-size", String.valueOf(128 * 1024 * 1024))
      .load(tableLocation + "#entries");
  Assert.assertEquals("Num partitions must match", 1, entriesDf.javaRDD().getNumPartitions());
}

Example 19

Source File: TestSparkDataWrite.java From iceberg with Apache License 2.0

4 votes

@Test
public void testWriteProjectionWithMiddle() throws IOException {
  Assume.assumeTrue(
      "Not supported in Spark 3.0; analysis requires all columns are present",
      spark.version().startsWith("2"));

  File parent = temp.newFolder(format.toString());
  File location = new File(parent, "test");

  HadoopTables tables = new HadoopTables(CONF);
  PartitionSpec spec = PartitionSpec.unpartitioned();
  Schema schema = new Schema(
      optional(1, "c1", Types.IntegerType.get()),
      optional(2, "c2", Types.StringType.get()),
      optional(3, "c3", Types.StringType.get())
  );
  Table table = tables.create(schema, spec, location.toString());

  List<ThreeColumnRecord> expected = Lists.newArrayList(
      new ThreeColumnRecord(1, null, "hello"),
      new ThreeColumnRecord(2, null, "world"),
      new ThreeColumnRecord(3, null, null)
  );

  Dataset<Row> df = spark.createDataFrame(expected, ThreeColumnRecord.class);

  df.select("c1", "c3").write()
      .format("iceberg")
      .option("write-format", format.toString())
      .mode("append")
      .save(location.toString());

  table.refresh();

  Dataset<Row> result = spark.read()
      .format("iceberg")
      .load(location.toString());

  List<ThreeColumnRecord> actual = result.orderBy("c1").as(Encoders.bean(ThreeColumnRecord.class)).collectAsList();
  Assert.assertEquals("Number of rows should match", expected.size(), actual.size());
  Assert.assertEquals("Result rows should match", expected, actual);
}

Example 20

Source File: Spark3Util.java From iceberg with Apache License 2.0

4 votes

/**
 * Converts Spark transforms into a {@link PartitionSpec}.
 *
 * @param schema the table schema
 * @param partitioning Spark Transforms
 * @return a PartitionSpec
 */
public static PartitionSpec toPartitionSpec(Schema schema, Transform[] partitioning) {
  if (partitioning == null || partitioning.length == 0) {
    return PartitionSpec.unpartitioned();
  }

  PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
  for (Transform transform : partitioning) {
    Preconditions.checkArgument(transform.references().length == 1,
        "Cannot convert transform with more than one column reference: %s", transform);
    String colName = DOT.join(transform.references()[0].fieldNames());
    switch (transform.name()) {
      case "identity":
        builder.identity(colName);
        break;
      case "bucket":
        builder.bucket(colName, findWidth(transform));
        break;
      case "years":
        builder.year(colName);
        break;
      case "months":
        builder.month(colName);
        break;
      case "date":
      case "days":
        builder.day(colName);
        break;
      case "date_hour":
      case "hours":
        builder.hour(colName);
        break;
      case "truncate":
        builder.truncate(colName, findWidth(transform));
        break;
      default:
        throw new UnsupportedOperationException("Transform is not supported: " + transform);
    }
  }

  return builder.build();
}