Java Code Examples for org.apache.iceberg.io.InputFile#location()

The following examples show how to use org.apache.iceberg.io.InputFile#location() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrcIterable.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static VectorizedRowBatchIterator newOrcIterator(InputFile file,
                                                         TypeDescription readerSchema,
                                                         Long start, Long length,
                                                         Reader orcFileReader, SearchArgument sarg) {
  final Reader.Options options = orcFileReader.options();
  if (start != null) {
    options.range(start, length);
  }
  options.schema(readerSchema);
  options.searchArgument(sarg, new String[]{});

  try {
    return new VectorizedRowBatchIterator(file.location(), readerSchema, orcFileReader.rows(options));
  } catch (IOException ioe) {
    throw new RuntimeIOException(ioe, "Failed to get ORC rows for file: %s", file);
  }
}
 
Example 2
Source File: ManifestLists.java    From iceberg with Apache License 2.0 6 votes vote down vote up
static List<ManifestFile> read(InputFile manifestList) {
  try (CloseableIterable<ManifestFile> files = Avro.read(manifestList)
      .rename("manifest_file", GenericManifestFile.class.getName())
      .rename("partitions", GenericPartitionFieldSummary.class.getName())
      .rename("r508", GenericPartitionFieldSummary.class.getName())
      .classLoader(GenericManifestFile.class.getClassLoader())
      .project(ManifestFile.schema())
      .reuseContainers(false)
      .build()) {

    return Lists.newLinkedList(files);

  } catch (IOException e) {
    throw new RuntimeIOException(e, "Cannot read manifest list file: %s", manifestList.location());
  }
}
 
Example 3
Source File: GenericManifestFile.java    From iceberg with Apache License 2.0 6 votes vote down vote up
GenericManifestFile(InputFile file, int specId) {
  this.avroSchema = AVRO_SCHEMA;
  this.file = file;
  this.manifestPath = file.location();
  this.length = null; // lazily loaded from file
  this.specId = specId;
  this.sequenceNumber = 0;
  this.minSequenceNumber = 0;
  this.snapshotId = null;
  this.addedFilesCount = null;
  this.addedRowsCount = null;
  this.existingFilesCount = null;
  this.existingRowsCount = null;
  this.deletedFilesCount = null;
  this.deletedRowsCount = null;
  this.partitions = null;
  this.fromProjectionPos = null;
}
 
Example 4
Source File: OrcMetrics.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static Metrics fromInputFile(InputFile file, Configuration config) {
  try (Reader orcReader = ORC.newFileReader(file, config)) {
    return buildOrcMetrics(orcReader.getNumberOfRows(), orcReader.getSchema(), orcReader.getStatistics());
  } catch (IOException ioe) {
    throw new RuntimeIOException(ioe, "Failed to open file: %s", file.location());
  }
}
 
Example 5
Source File: ReadConf.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static ParquetFileReader newReader(InputFile file, ParquetReadOptions options) {
  try {
    return ParquetFileReader.open(ParquetIO.file(file), options);
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to open Parquet file: %s", file.location());
  }
}
 
Example 6
Source File: FileMetadata.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder withInputFile(InputFile file) {
  if (file instanceof HadoopInputFile) {
    return withStatus(((HadoopInputFile) file).getStat());
  }

  this.filePath = file.location();
  this.fileSizeInBytes = file.getLength();
  return this;
}
 
Example 7
Source File: DataFiles.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static DataFile fromInputFile(InputFile file, PartitionData partition, long rowCount) {
  if (file instanceof HadoopInputFile) {
    return fromStat(((HadoopInputFile) file).getStat(), partition, rowCount);
  }

  String location = file.location();
  FileFormat format = FileFormat.fromFileName(location);
  return new GenericDataFile(
      location, format, partition, rowCount, file.getLength());
}
 
Example 8
Source File: DataFiles.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static DataFile fromInputFile(InputFile file, long rowCount) {
  if (file instanceof HadoopInputFile) {
    return fromStat(((HadoopInputFile) file).getStat(), rowCount);
  }

  String location = file.location();
  FileFormat format = FileFormat.fromFileName(location);
  return new GenericDataFile(location, format, rowCount, file.getLength());
}
 
Example 9
Source File: DataFiles.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static DataFile fromEncryptedOutputFile(EncryptedOutputFile encryptedFile, PartitionData partition,
                                              Metrics metrics, List<Long> splitOffsets) {
  EncryptionKeyMetadata keyMetadata = encryptedFile.keyMetadata();
  InputFile file = encryptedFile.encryptingOutputFile().toInputFile();
  if (encryptedFile instanceof HadoopInputFile) {
    return fromStat(((HadoopInputFile) file).getStat(), partition, metrics, keyMetadata, splitOffsets);
  }

  String location = file.location();
  FileFormat format = FileFormat.fromFileName(location);
  return new GenericDataFile(
      location, format, partition, file.getLength(), metrics, keyMetadata.buffer(), splitOffsets);
}
 
Example 10
Source File: DataFiles.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder withInputFile(InputFile file) {
  if (file instanceof HadoopInputFile) {
    return withStatus(((HadoopInputFile) file).getStat());
  }

  this.filePath = file.location();
  this.fileSizeInBytes = file.getLength();
  return this;
}
 
Example 11
Source File: ManifestFiles.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static ManifestFile copyAppendManifest(int formatVersion,
                                       InputFile toCopy, Map<Integer, PartitionSpec> specsById,
                                       OutputFile outputFile, long snapshotId,
                                       SnapshotSummary.Builder summaryBuilder) {
  // use metadata that will add the current snapshot's ID for the rewrite
  InheritableMetadata inheritableMetadata = InheritableMetadataFactory.forCopy(snapshotId);
  try (ManifestReader<DataFile> reader =
           new ManifestReader<>(toCopy, specsById, inheritableMetadata, FileType.DATA_FILES)) {
    return copyManifestInternal(
        formatVersion, reader, outputFile, snapshotId, summaryBuilder, ManifestEntry.Status.ADDED);
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location());
  }
}
 
Example 12
Source File: ManifestFiles.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static ManifestFile copyRewriteManifest(int formatVersion,
                                        InputFile toCopy, Map<Integer, PartitionSpec> specsById,
                                        OutputFile outputFile, long snapshotId,
                                        SnapshotSummary.Builder summaryBuilder) {
  // for a rewritten manifest all snapshot ids should be set. use empty metadata to throw an exception if it is not
  InheritableMetadata inheritableMetadata = InheritableMetadataFactory.empty();
  try (ManifestReader<DataFile> reader =
           new ManifestReader<>(toCopy, specsById, inheritableMetadata, FileType.DATA_FILES)) {
    return copyManifestInternal(
        formatVersion, reader, outputFile, snapshotId, summaryBuilder, ManifestEntry.Status.EXISTING);
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location());
  }
}
 
Example 13
Source File: TableMetadata.java    From iceberg with Apache License 2.0 4 votes vote down vote up
TableMetadata(InputFile file,
              int formatVersion,
              String uuid,
              String location,
              long lastSequenceNumber,
              long lastUpdatedMillis,
              int lastColumnId,
              Schema schema,
              int defaultSpecId,
              List<PartitionSpec> specs,
              Map<String, String> properties,
              long currentSnapshotId,
              List<Snapshot> snapshots,
              List<HistoryEntry> snapshotLog,
              List<MetadataLogEntry> previousFiles) {
  Preconditions.checkArgument(specs != null && !specs.isEmpty(), "Partition specs cannot be null or empty");
  Preconditions.checkArgument(formatVersion <= SUPPORTED_TABLE_FORMAT_VERSION,
      "Unsupported format version: v%s", formatVersion);
  Preconditions.checkArgument(formatVersion == 1 || uuid != null,
      "UUID is required in format v%s", formatVersion);
  Preconditions.checkArgument(formatVersion > 1 || lastSequenceNumber == 0,
      "Sequence number must be 0 in v1: %s", lastSequenceNumber);

  this.formatVersion = formatVersion;
  this.file = file;
  this.metadataFileLocation = file != null ? file.location() : null;
  this.uuid = uuid;
  this.location = location;
  this.lastSequenceNumber = lastSequenceNumber;
  this.lastUpdatedMillis = lastUpdatedMillis;
  this.lastColumnId = lastColumnId;
  this.schema = schema;
  this.specs = specs;
  this.defaultSpecId = defaultSpecId;
  this.properties = properties;
  this.currentSnapshotId = currentSnapshotId;
  this.snapshots = snapshots;
  this.snapshotLog = snapshotLog;
  this.previousFiles = previousFiles;

  this.snapshotsById = indexAndValidateSnapshots(snapshots, lastSequenceNumber);
  this.specsById = indexSpecs(specs);

  HistoryEntry last = null;
  for (HistoryEntry logEntry : snapshotLog) {
    if (last != null) {
      Preconditions.checkArgument(
          (logEntry.timestampMillis() - last.timestampMillis()) >= -ONE_MINUTE,
          "[BUG] Expected sorted snapshot log entries.");
    }
    last = logEntry;
  }
  if (last != null) {
    Preconditions.checkArgument(
        // commits can happen concurrently from different machines.
        // A tolerance helps us avoid failure for small clock skew
        lastUpdatedMillis - last.timestampMillis() >= -ONE_MINUTE,
        "Invalid update timestamp %s: before last snapshot log entry at %s",
        lastUpdatedMillis, last.timestampMillis());
  }

  MetadataLogEntry previous = null;
  for (MetadataLogEntry metadataEntry : previousFiles) {
    if (previous != null) {
      Preconditions.checkArgument(
          // commits can happen concurrently from different machines.
          // A tolerance helps us avoid failure for small clock skew
          (metadataEntry.timestampMillis() - previous.timestampMillis()) >= -ONE_MINUTE,
          "[BUG] Expected sorted previous metadata log entries.");
    }
    previous = metadataEntry;
  }
    // Make sure that this update's lastUpdatedMillis is > max(previousFile's timestamp)
  if (previous != null) {
    Preconditions.checkArgument(
        // commits can happen concurrently from different machines.
        // A tolerance helps us avoid failure for small clock skew
        lastUpdatedMillis - previous.timestampMillis >= -ONE_MINUTE,
        "Invalid update timestamp %s: before the latest metadata log entry timestamp %s",
        lastUpdatedMillis, previous.timestampMillis);
  }

  Preconditions.checkArgument(
      currentSnapshotId < 0 || snapshotsById.containsKey(currentSnapshotId),
      "Invalid table metadata: Cannot find current version");
}
 
Example 14
Source File: DataFiles.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public static DataFile fromManifestList(InputFile manifestList) {
  return new GenericDataFile(manifestList.location(), FileFormat.AVRO, 1, manifestList.getLength());
}