package org.apache.iceberg;

import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.util.ThreadPools;

public class DataTableScan extends BaseTableScan {
  static final ImmutableList<String> SCAN_COLUMNS = ImmutableList.of(
      "snapshot_id", "file_path", "file_ordinal", "file_format", "block_size_in_bytes",
      "file_size_in_bytes", "record_count", "partition", "key_metadata"
  static final ImmutableList<String> SCAN_WITH_STATS_COLUMNS = ImmutableList.<String>builder()
      .add("value_counts", "null_value_counts", "lower_bounds", "upper_bounds", "column_sizes")
  static final boolean PLAN_SCANS_WITH_WORKER_POOL =
      SystemProperties.getBoolean(SystemProperties.SCAN_THREAD_POOL_ENABLED, true);

  public DataTableScan(TableOperations ops, Table table) {
    super(ops, table, table.schema());

  protected DataTableScan(TableOperations ops, Table table, Schema schema, TableScanContext context) {
    super(ops, table, schema, context);

  public TableScan appendsBetween(long fromSnapshotId, long toSnapshotId) {
    Long scanSnapshotId = snapshotId();
    Preconditions.checkState(scanSnapshotId == null,
        "Cannot enable incremental scan, scan-snapshot set to id=%s", scanSnapshotId);
    return new IncrementalDataTableScan(tableOps(), table(), schema(),

  public TableScan appendsAfter(long fromSnapshotId) {
    Snapshot currentSnapshot = table().currentSnapshot();
    Preconditions.checkState(currentSnapshot != null, "Cannot scan appends after %s, there is no current snapshot",
    return appendsBetween(fromSnapshotId, currentSnapshot.snapshotId());

  protected TableScan newRefinedScan(TableOperations ops, Table table, Schema schema, TableScanContext context) {
    return new DataTableScan(ops, table, schema, context);

  public CloseableIterable<FileScanTask> planFiles(TableOperations ops, Snapshot snapshot,
                                                   Expression rowFilter, boolean ignoreResiduals,
                                                   boolean caseSensitive, boolean colStats) {
    ManifestGroup manifestGroup = new ManifestGroup(ops.io(), snapshot.dataManifests())
        .select(colStats ? SCAN_WITH_STATS_COLUMNS : SCAN_COLUMNS)

    if (ignoreResiduals) {
      manifestGroup = manifestGroup.ignoreResiduals();

    if (PLAN_SCANS_WITH_WORKER_POOL && snapshot.dataManifests().size() > 1) {
      manifestGroup = manifestGroup.planWith(ThreadPools.getWorkerPool());

    return manifestGroup.planFiles();

  protected long targetSplitSize(TableOperations ops) {
    return ops.current().propertyAsLong(
        TableProperties.SPLIT_SIZE, TableProperties.SPLIT_SIZE_DEFAULT);