java source code of Writer

iceberg-master
- orc
  - src
    - main
      - java
        com
        netflix
        iceberg
        orc
        ORC.java
        ColumnIdMap.java
        OrcIterator.java
        TypeConversion.java
        OrcFileAppender.java
- gradle
  - wrapper
    - gradle-wrapper.properties
- images
- examples
  - Convert table to Iceberg.ipynb
- LICENSE
- api
  - src
    - main
      - java
        com
        netflix
        iceberg
        FileScanTask.java
        ReplacePartitions.java
        StructLike.java
        UpdateSchema.java
        Rollback.java
        CombinedScanTask.java
        FileFormat.java
        Snapshot.java
        PendingUpdate.java
        Transaction.java
        Schema.java
        RewriteFiles.java
        PartitionSpec.java
        UpdateProperties.java
        Files.java
        Filterable.java
        expressions
        Expressions.java
        True.java
        ResidualEvaluator.java
        Projections.java
        Binder.java
        SerializationProxies.java
        BoundPredicate.java
        Evaluator.java
        Or.java
        False.java
        Not.java
        RewriteNot.java
        BoundReference.java
        And.java
        InclusiveManifestEvaluator.java
        Reference.java
        NamedReference.java
        ExpressionVisitors.java
        Literals.java
        InclusiveMetricsEvaluator.java
        Literal.java
        Predicate.java
        StrictMetricsEvaluator.java
        UnboundPredicate.java
        Expression.java
        ScanTask.java
        Metrics.java
        types
        IndexById.java
        PruneColumns.java
        FindTypeVisitor.java
        AssignFreshIds.java
        ReassignIds.java
        CheckCompatibility.java
        Types.java
        PrimitiveHolder.java
        TypeUtil.java
        Conversions.java
        Comparators.java
        IndexByName.java
        GetProjectedIds.java
        Type.java
        ExpireSnapshots.java
        DataFile.java
        ManifestFile.java
        Tables.java
        PartitionField.java
        io
        PositionOutputStream.java
        FileAppender.java
        DelegatingInputStream.java
        SeekableInputStream.java
        CloseableIterable.java
        InputFile.java
        OutputFile.java
        CloseableGroup.java
        DelegatingOutputStream.java
        exceptions
        RuntimeIOException.java
        CommitFailedException.java
        NoSuchTableException.java
        ValidationException.java
        AlreadyExistsException.java
        Table.java
        TableScan.java
        OverwriteFiles.java
        AppendFiles.java
        transforms
        Identity.java
        Transforms.java
        ProjectionUtil.java
        Timestamps.java
        TransformUtil.java
        Truncate.java
        PartitionSpecVisitor.java
        Bucket.java
        Dates.java
        Transform.java
        DeleteFiles.java
        events
        ScanEvent.java
        Listener.java
        Listeners.java
    - test
      - java
        com
        netflix
        iceberg
        TestHelpers.java
        expressions
        TestNumericLiteralConversions.java
        TestEvaluatior.java
        TestPredicateBinding.java
        TestInclusiveMetricsEvaluator.java
        TestMiscLiteralConversions.java
        TestStringLiteralConversions.java
        TestStrictMetricsEvaluator.java
        TestLiteralSerialization.java
        TestInclusiveManifestEvaluator.java
        TestExpressionBinding.java
        TestExpressionHelpers.java
        TestExpressionSerialization.java
        types
        TestReadabilityChecks.java
        TestSerializableTypes.java
        TestComparableComparator.java
        TestBinaryComparator.java
        TestCharSeqComparator.java
        TestPartitionPaths.java
        transforms
        TestProjection.java
        TestBucketing.java
        TestIdentity.java
        TestDates.java
        TestResiduals.java
        TestTransformSerialization.java
        TestTimestamps.java
        TestTruncate.java
        events
        TestListeners.java
- gradlew
- common
  - src
    - main
      - java
        com
        netflix
        iceberg
        common
        DynClasses.java
        DynConstructors.java
        DynMethods.java
        DynFields.java
- parquet
  - src
    - main
      - java
        com
        netflix
        iceberg
        parquet
        TypeToMessageType.java
        ParquetAvroWriter.java
        PruneColumns.java
        ParquetDictionaryRowGroupFilter.java
        TypeWithSchemaVisitor.java
        Parquet.java
        ColumnIterator.java
        ParquetAvroValueReaders.java
        ParquetTypeVisitor.java
        ParquetMetrics.java
        ParquetFilters.java
        ParquetValueWriters.java
        ParquetWriter.java
        ParquetWriteSupport.java
        ParquetIO.java
        ParquetReadSupport.java
        TripleIterator.java
        ColumnWriter.java
        ParquetAvro.java
        ParquetReader.java
        ParquetConversions.java
        ParquetValueWriter.java
        ParquetAvroReader.java
        ParquetWriteAdapter.java
        TripleWriter.java
        ParquetValueReaders.java
        ParquetValueReader.java
        ParquetSchemaUtil.java
        PageIterator.java
        MessageTypeToType.java
        ParquetIterable.java
        ParquetMetricsRowGroupFilter.java
    - test
      - java
        com
        netflix
        iceberg
        avro
        TestReadProjection.java
        TestParquetReadProjection.java
        TestHelpers.java
        parquet
        TestMetricsRowGroupFilter.java
        TestMetricsRowGroupFilterTypes.java
        TestDictionaryRowGroupFilter.java
- spark
  - src
    - main
      - resources
        META-INF
        services
        org.apache.spark.sql.sources.DataSourceRegister
      - scala
        com
        netflix
        iceberg
        spark
        SparkTableUtil.scala
      - java
        com
        netflix
        iceberg
        spark
        SparkTypeVisitor.java
        SparkFilters.java
        source
        Reader.java
        Writer.java
        IcebergSource.java
        Stats.java
        PartitionKey.java
        SparkSchemaUtil.java
        TypeToSparkType.java
        FixupTypes.java
        data
        SparkOrcWriter.java
        SparkValueWriters.java
        SparkOrcReader.java
        SparkAvroWriter.java
        SparkValueReaders.java
        SparkAvroReader.java
        SparkParquetReaders.java
        PruneColumnsWithoutReordering.java
        PruneColumnsWithReordering.java
        SparkExpressions.java
        SparkTypeToType.java
        hacks
        Hive.java
    - test
      - java
        com
        netflix
        iceberg
        spark
        source
        TestAvroScan.java
        TestOrcWrite.java
        TestReadProjection.java
        TestTables.java
        TestIcebergSource.java
        TestSparkReadProjection.java
        TestParquetScan.java
        TestParquetWrite.java
        TestFilteredScan.java
        TestDataFrameWrites.java
        TestOrcScan.java
        SimpleRecord.java
        data
        TestHelpers.java
        CodegenExamples.java
        RandomData.java
        TestParquetAvroWriter.java
        TestParquetAvroReader.java
        TestSparkDateTimes.java
        TestSparkParquetReader.java
        TestSparkAvroReader.java
        AvroDataTest.java
- build.gradle
- pig
  - src
    - main
      - java
        com
        netflix
        iceberg
        pig
        IcebergPigInputFormat.java
        SchemaUtil.java
        PigParquetReader.java
        IcebergStorage.java
    - test
      - java
        com
        netflix
        iceberg
        pig
        SchemaUtilTest.java
- data
  - src
    - main
      - java
        com
        netflix
        iceberg
        data
        avro
        GenericReaders.java
        IcebergDecoder.java
        DataWriter.java
        GenericWriters.java
        IcebergEncoder.java
        DataReader.java
        IcebergGenerics.java
        Record.java
        parquet
        GenericParquetReaders.java
        GenericParquetWriter.java
        TableScanIterable.java
        GenericRecord.java
    - test
      - java
        com
        netflix
        iceberg
        data
        TestLocalScan.java
        avro
        TestGenericReadProjection.java
        TestGenericData.java
        TestSingleMessageEncoding.java
        TestReadProjection.java
        parquet
        TestGenericReadProjection.java
        TestGenericData.java
        DataTest.java
        DataTestHelpers.java
        RandomGenericData.java
- .travis.yml
- README.md
- OSSMETADATA
- hive
  - src
    - main
      - java
        com
        netflix
        iceberg
        hive
        HiveTableOperations.java
        HiveTables.java
        HiveTypeConverter.java
    - test
      - resources
        hive-schema-3.1.0.derby.sql
      - java
        com
        netflix
        iceberg
        hive
        ScriptRunner.java
        HiveTableBaseTest.java
        HiveTablesTest.java
- core
  - src
    - main
      - java
        com
        netflix
        iceberg
        avro
        AvroIO.java
        TypeToSchema.java
        PruneColumns.java
        AvroSchemaUtil.java
        Avro.java
        AvroFileAppender.java
        SchemaToType.java
        GenericAvroReader.java
        ValueReaders.java
        AvroSchemaVisitor.java
        GenericAvroWriter.java
        LogicalMap.java
        BuildAvroProjection.java
        UUIDConversion.java
        ProjectionDatumReader.java
        ValueReader.java
        AvroCustomOrderSchemaVisitor.java
        ValueWriters.java
        AvroIterable.java
        ValueWriter.java
        ScanSummary.java
        GenericPartitionFieldSummary.java
        MergeAppend.java
        BaseMetastoreTables.java
        ConfigProperties.java
        StreamingDelete.java
        RollbackToSnapshot.java
        SystemProperties.java
        FileIO.java
        hadoop
        HadoopInputFile.java
        HadoopFileIO.java
        HadoopTables.java
        HadoopOutputFile.java
        SerializableConfiguration.java
        Util.java
        HadoopTableOperations.java
        HadoopStreams.java
        SerializableByteBufferMap.java
        GenericDataFile.java
        ManifestGroup.java
        SchemaParser.java
        OverwriteData.java
        TableMetadata.java
        PartitionSpecParser.java
        TableProperties.java
        ReplaceFiles.java
        ManifestListWriter.java
        BaseFileScanTask.java
        PartitionData.java
        util
        Tasks.java
        ParallelIterable.java
        BinPacking.java
        Exceptions.java
        JsonUtil.java
        ExceptionUtil.java
        StructLikeWrapper.java
        Pair.java
        CharSequenceWrapper.java
        ThreadPools.java
        SnapshotUpdate.java
        BaseTable.java
        FastAppend.java
        FileHistory.java
        GenericManifestFile.java
        ManifestReader.java
        BaseMetastoreTableOperations.java
        MergingSnapshotUpdate.java
        TableOperations.java
        PropertiesUpdate.java
        ManifestEntry.java
        SnapshotParser.java
        ReplacePartitionsOperation.java
        FilteredManifest.java
        BaseTransaction.java
        RemoveSnapshots.java
        TableMetadataParser.java
        ManifestWriter.java
        DataFiles.java
        PartitionSummary.java
        SchemaUpdate.java
        BaseSnapshot.java
        BaseTableScan.java
        BaseCombinedScanTask.java
        HasTableOperations.java
    - test
      - java
        com
        netflix
        iceberg
        avro
        TestGenericAvro.java
        TestSchemaConversions.java
        TestAvroReadProjection.java
        RandomAvroData.java
        TestReadProjection.java
        AvroTestHelpers.java
        AvroDataTest.java
        TestSnapshotJson.java
        TestReplaceFiles.java
        TestReplacePartitions.java
        TestCreateTransaction.java
        TestReplaceTransaction.java
        hadoop
        TestHadoopCommits.java
        HadoopTableTestBase.java
        TestTables.java
        LocalTableOperations.java
        TestDeleteFiles.java
        TestOverwrite.java
        AssertHelpers.java
        TestSchemaUpdate.java
        util
        TestBinPacking.java
        TableMetadataParserTest.java
        TestFastAppend.java
        TestMergeAppend.java
        TestTableMetadataJson.java
        TestScanSummary.java
        TestTransaction.java
        TableTestBase.java
- settings.gradle
- NOTICE
- .gitignore
- docs
  - current
    - javadoc
      - package-list
      - script.js
      - overview-summary.html
      - overview-frame.html
      - overview-tree.html
      - serialized-form.html
      - help-doc.html
      - constant-values.html
      - deprecated-list.html
      - allclasses-frame.html
      - allclasses-noframe.html
      - com
        netflix
        iceberg
        Schema.html
        package-frame.html
        OverwriteFiles.html
        avro
        ValueWriters.html
        package-frame.html
        AvroIterable.html
        LogicalMap.html
        ValueReaders.StructReader.html
        AvroSchemaVisitor.html
        package-tree.html
        ProjectionDatumReader.html
        Avro.WriteBuilder.html
        Avro.html
        UUIDConversion.html
        ValueWriter.html
        ValueWriters.StructWriter.html
        ValueReaders.html
        ValueReader.html
        Avro.ReadBuilder.html
        package-summary.html
        AvroSchemaUtil.html
        HasTableOperations.html
        DataFiles.Builder.html
        Transaction.html
        SchemaParser.html
        Filterable.html
        orc
        package-frame.html
        ORC.WriteBuilder.html
        OrcFileAppender.html
        ORC.ReadBuilder.html
        package-tree.html
        ORC.html
        package-summary.html
        TypeConversion.html
        OrcIterator.html
        ColumnIdMap.html
        PartitionField.html
        CombinedScanTask.html
        DeleteFiles.html
        ReplacePartitionsOperation.html
        TableMetadataParser.html
        ReplacePartitions.html
        package-tree.html
        hadoop
        package-frame.html
        package-tree.html
        HadoopOutputFile.html
        HadoopTables.html
        HadoopInputFile.html
        package-summary.html
        BaseMetastoreTables.html
        TableScan.html
        Metrics.html
        ManifestReader.html
        ScanSummary.html
        RewriteFiles.html
        common
        package-frame.html
        DynClasses.Builder.html
        DynConstructors.html
        DynMethods.UnboundMethod.html
        package-tree.html
        DynFields.UnboundField.html
        DynMethods.StaticMethod.html
        DynMethods.html
        DynMethods.BoundMethod.html
        DynFields.StaticField.html
        DynConstructors.Builder.html
        DynFields.Builder.html
        DynFields.BoundField.html
        DynFields.html
        DynConstructors.Ctor.html
        DynClasses.html
        DynMethods.Builder.html
        package-summary.html
        DataFile.html
        BaseCombinedScanTask.html
        ScanSummary.PartitionMetrics.html
        PendingUpdate.html
        util
        ParallelIterable.html
        package-frame.html
        Tasks.UnrecoverableException.html
        Tasks.Task.html
        ThreadPools.html
        CharSequenceWrapper.html
        BinPacking.ListPacker.html
        BinPacking.PackingIterable.html
        package-tree.html
        ExceptionUtil.html
        Tasks.FailureTask.html
        Exceptions.html
        Tasks.Builder.html
        BinPacking.html
        Pair.html
        JsonUtil.html
        package-summary.html
        StructLikeWrapper.html
        Tasks.html
        Files.html
        ScanSummary.Builder.html
        TableMetadata.html
        parquet
        ParquetValueReaders.ListReader.html
        package-frame.html
        ParquetMetrics.html
        ParquetValueReaders.ReusableEntry.html
        ParquetValueWriters.RepeatedKeyValueWriter.html
        ParquetDictionaryRowGroupFilter.html
        ParquetValueWriters.RepeatedWriter.html
        ParquetAvroValueReaders.html
        ParquetWriteAdapter.html
        ParquetAvroValueReaders.TimestampMillisReader.html
        TypeToMessageType.html
        package-tree.html
        ParquetValueReaders.html
        ParquetValueReaders.RepeatedKeyValueReader.html
        ParquetValueReaders.StringReader.html
        ParquetValueWriters.StructWriter.html
        ParquetValueReaders.IntAsLongReader.html
        ParquetValueReaders.BytesReader.html
        ParquetValueWriters.PrimitiveWriter.html
        ParquetIterable.html
        ColumnIterator.html
        ParquetValueReaders.MapReader.html
        ParquetValueReader.html
        ParquetTypeVisitor.html
        TripleWriter.html
        ParquetSchemaUtil.html
        ParquetValueReaders.UnboxedReader.html
        ParquetAvroWriter.html
        ParquetValueReaders.LongAsDecimalReader.html
        ParquetValueReaders.IntegerAsDecimalReader.html
        ParquetValueWriters.html
        Parquet.WriteBuilder.html
        ParquetValueReaders.RepeatedReader.html
        ParquetAvroReader.html
        Parquet.html
        ParquetValueWriter.html
        ParquetMetricsRowGroupFilter.html
        ParquetValueReaders.FloatAsDoubleReader.html
        ParquetReader.html
        ParquetValueReaders.StructReader.html
        TypeWithSchemaVisitor.html
        ColumnWriter.html
        package-summary.html
        ParquetValueReaders.BinaryAsDecimalReader.html
        ParquetValueReaders.PrimitiveReader.html
        Parquet.ReadBuilder.html
        ParquetAvroValueReaders.TimeMillisReader.html
        Rollback.html
        BaseReplaceFiles.html
        PartitionSpecParser.html
        spark
        package-frame.html
        SparkExpressions.html
        source
        package-frame.html
        IcebergSource.html
        package-tree.html
        package-summary.html
        package-tree.html
        PruneColumnsWithReordering.html
        PruneColumnsWithoutReordering.html
        data
        package-frame.html
        SparkValueReaders.html
        SparkOrcWriter.html
        SparkOrcReader.html
        SparkValueWriters.html
        package-tree.html
        SparkAvroReader.html
        SparkAvroWriter.html
        SparkParquetReaders.html
        package-summary.html
        SparkSchemaUtil.html
        SparkFilters.html
        package-summary.html
        hacks
        package-frame.html
        package-tree.html
        Hive.html
        package-summary.html
        Tables.html
        expressions
        Projections.html
        Binder.html
        package-frame.html
        Projections.ProjectionEvaluator.html
        BoundPredicate.html
        Literal.html
        Predicate.html
        Expression.html
        Reference.html
        package-tree.html
        NamedReference.html
        Or.html
        Not.html
        ExpressionVisitors.ExpressionVisitor.html
        Evaluator.html
        And.html
        UnboundPredicate.html
        Expressions.html
        True.html
        InclusiveMetricsEvaluator.html
        Expression.Operation.html
        ExpressionVisitors.html
        StrictMetricsEvaluator.html
        False.html
        package-summary.html
        ResidualEvaluator.html
        BoundReference.html
        ExpressionVisitors.BoundExpressionVisitor.html
        FilteredManifest.html
        OverwriteData.html
        FilteredSnapshot.html
        FileFormat.html
        DataFiles.html
        pig
        package-frame.html
        IcebergStorage.html
        package-tree.html
        IcebergPigInputFormat.IcebergRecordReader.html
        IcebergPigInputFormat.html
        SchemaUtil.html
        package-summary.html
        PigParquetReader.html
        data
        package-frame.html
        avro
        package-frame.html
        DataReader.html
        DataWriter.html
        IcebergEncoder.html
        package-tree.html
        IcebergDecoder.html
        package-summary.html
        IcebergGenerics.html
        package-tree.html
        IcebergGenerics.ScanBuilder.html
        Record.html
        parquet
        package-frame.html
        GenericParquetWriter.html
        package-tree.html
        GenericParquetReaders.html
        package-summary.html
        GenericRecord.html
        package-summary.html
        types
        package-frame.html
        Types.FixedType.html
        Types.DateType.html
        Types.ListType.html
        Types.TimeType.html
        Type.NestedType.html
        Type.PrimitiveType.html
        Types.UUIDType.html
        TypeUtil.NextID.html
        Types.StructType.html
        TypeUtil.SchemaVisitor.html
        package-tree.html
        Types.IntegerType.html
        Types.BooleanType.html
        Types.DecimalType.html
        Types.LongType.html
        Types.StringType.html
        Types.MapType.html
        Type.html
        Types.DoubleType.html
        Types.TimestampType.html
        TypeUtil.html
        Conversions.html
        Types.BinaryType.html
        Types.FloatType.html
        Types.NestedField.html
        TypeUtil.CustomOrderSchemaVisitor.html
        package-summary.html
        CheckCompatibility.html
        Types.html
        Comparators.html
        IndexByName.html
        Type.TypeID.html
        TableProperties.html
        io
        package-frame.html
        DelegatingOutputStream.html
        CloseableIterable.html
        OutputFile.html
        package-tree.html
        CloseableGroup.html
        InputFile.html
        DelegatingInputStream.html
        SeekableInputStream.html
        package-summary.html
        FileAppender.html
        PositionOutputStream.html
        exceptions
        package-frame.html
        package-tree.html
        NoSuchTableException.html
        RuntimeIOException.html
        CommitFailedException.html
        AlreadyExistsException.html
        package-summary.html
        ValidationException.html
        TableMetadata.SnapshotLogEntry.html
        FileScanTask.html
        SnapshotParser.html
        PartitionSpec.html
        TableOperations.html
        ExpireSnapshots.html
        BaseTable.html
        Table.html
        UpdateSchema.html
        AppendFiles.html
        package-summary.html
        PartitionSpec.Builder.html
        Snapshot.html
        StructLike.html
        BaseMetastoreTableOperations.html
        transforms
        package-frame.html
        Transforms.html
        package-tree.html
        Transform.html
        PartitionSpecVisitor.html
        package-summary.html
        UpdateProperties.html
        ScanTask.html
      - index.html
      - stylesheet.css

/*
 * Copyright 2017 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netflix.iceberg.spark.source;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.netflix.iceberg.AppendFiles;
import com.netflix.iceberg.DataFile;
import com.netflix.iceberg.DataFiles;
import com.netflix.iceberg.FileFormat;
import com.netflix.iceberg.Metrics;
import com.netflix.iceberg.PartitionSpec;
import com.netflix.iceberg.Schema;
import com.netflix.iceberg.Table;
import com.netflix.iceberg.TableProperties;
import com.netflix.iceberg.avro.Avro;
import com.netflix.iceberg.exceptions.RuntimeIOException;
import com.netflix.iceberg.hadoop.HadoopInputFile;
import com.netflix.iceberg.hadoop.HadoopOutputFile;
import com.netflix.iceberg.io.FileAppender;
import com.netflix.iceberg.io.InputFile;
import com.netflix.iceberg.io.OutputFile;
import com.netflix.iceberg.orc.ORC;
import com.netflix.iceberg.parquet.Parquet;
import com.netflix.iceberg.spark.data.SparkAvroWriter;
import com.netflix.iceberg.spark.data.SparkOrcWriter;
import com.netflix.iceberg.transforms.Transform;
import com.netflix.iceberg.transforms.Transforms;
import com.netflix.iceberg.types.Types.StringType;
import com.netflix.iceberg.util.Tasks;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport;
import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
import org.apache.spark.sql.sources.v2.writer.DataWriter;
import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
import org.apache.spark.sql.sources.v2.writer.SupportsWriteInternalRow;
import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
import org.apache.spark.util.SerializableConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;

import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.transform;
import static com.netflix.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS;
import static com.netflix.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS_DEFAULT;
import static com.netflix.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS;
import static com.netflix.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS_DEFAULT;
import static com.netflix.iceberg.TableProperties.COMMIT_NUM_RETRIES;
import static com.netflix.iceberg.TableProperties.COMMIT_NUM_RETRIES_DEFAULT;
import static com.netflix.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS;
import static com.netflix.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT;
import static com.netflix.iceberg.TableProperties.OBJECT_STORE_ENABLED;
import static com.netflix.iceberg.TableProperties.OBJECT_STORE_ENABLED_DEFAULT;
import static com.netflix.iceberg.TableProperties.OBJECT_STORE_PATH;
import static com.netflix.iceberg.spark.SparkSchemaUtil.convert;

// TODO: parameterize DataSourceWriter with subclass of WriterCommitMessage
class Writer implements DataSourceWriter, SupportsWriteInternalRow {
  private static final Transform<String, Integer> HASH_FUNC = Transforms
      .bucket(StringType.get(), Integer.MAX_VALUE);
  private static final Logger LOG = LoggerFactory.getLogger(Writer.class);

  private final Table table;
  private final Configuration conf;
  private final FileFormat format;

  Writer(Table table, Configuration conf, FileFormat format) {
    this.table = table;
    this.conf = conf;
    this.format = format;
  }

  @Override
  public DataWriterFactory<InternalRow> createInternalRowWriterFactory() {
    return new WriterFactory(table.spec(), format, dataLocation(), table.properties(), conf);
  }

  @Override
  public void commit(WriterCommitMessage[] messages) {
    AppendFiles append = table.newAppend();

    int numFiles = 0;
    for (DataFile file : files(messages)) {
      numFiles += 1;
      append.appendFile(file);
    }

    LOG.info("Appending {} files to {}", numFiles, table);
    long start = System.currentTimeMillis();
    append.commit(); // abort is automatically called if this fails
    long duration = System.currentTimeMillis() - start;
    LOG.info("Committed in {} ms", duration);
  }

  @Override
  public void abort(WriterCommitMessage[] messages) {
    FileSystem fs;
    try {
      fs = new Path(table.location()).getFileSystem(conf);
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }

    Tasks.foreach(files(messages))
        .retry(propertyAsInt(COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT))
        .exponentialBackoff(
            propertyAsInt(COMMIT_MIN_RETRY_WAIT_MS, COMMIT_MIN_RETRY_WAIT_MS_DEFAULT),
            propertyAsInt(COMMIT_MAX_RETRY_WAIT_MS, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT),
            propertyAsInt(COMMIT_TOTAL_RETRY_TIME_MS, COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT),
            2.0 /* exponential */ )
        .throwFailureWhenFinished()
        .run(file -> {
          try {
            fs.delete(new Path(file.path().toString()), false /* not recursive */ );
          } catch (IOException e) {
            throw new RuntimeIOException(e);
          }
        });
  }

  private Iterable<DataFile> files(WriterCommitMessage[] messages) {
    if (messages.length > 0) {
      return concat(transform(Arrays.asList(messages), message -> message != null
          ? ImmutableList.copyOf(((TaskCommit) message).files())
          : ImmutableList.of()));
    }
    return ImmutableList.of();
  }

  private int propertyAsInt(String property, int defaultValue) {
    Map<String, String> properties = table.properties();
    String value = properties.get(property);
    if (value != null) {
      return Integer.parseInt(properties.get(property));
    }
    return defaultValue;
  }

  private String dataLocation() {
    return table.properties().getOrDefault(
        TableProperties.WRITE_NEW_DATA_LOCATION,
        new Path(new Path(table.location()), "data").toString());
  }

  @Override
  public String toString() {
    return String.format("IcebergWrite(table=%s, type=%s, format=%s)",
        table, table.schema().asStruct(), format);
  }


  private static class TaskCommit implements WriterCommitMessage {
    private final DataFile[] files;

    TaskCommit() {
      this.files = new DataFile[0];
    }

    TaskCommit(DataFile file) {
      this.files = new DataFile[] { file };
    }

    TaskCommit(List<DataFile> files) {
      this.files = files.toArray(new DataFile[files.size()]);
    }

    DataFile[] files() {
      return files;
    }
  }

  private static class WriterFactory implements DataWriterFactory<InternalRow> {
    private final PartitionSpec spec;
    private final FileFormat format;
    private final String dataLocation;
    private final Map<String, String> properties;
    private final SerializableConfiguration conf;
    private final String uuid = UUID.randomUUID().toString();

    private transient Path dataPath = null;

    WriterFactory(PartitionSpec spec, FileFormat format, String dataLocation,
                  Map<String, String> properties, Configuration conf) {
      this.spec = spec;
      this.format = format;
      this.dataLocation = dataLocation;
      this.properties = properties;
      this.conf = new SerializableConfiguration(conf);
    }

    @Override
    public DataWriter<InternalRow> createDataWriter(int partitionId, int attemptNumber) {
      String filename = format.addExtension(String.format("%05d-%d-%s",
          partitionId, attemptNumber, uuid));
      AppenderFactory<InternalRow> factory = new SparkAppenderFactory();
      if (spec.fields().isEmpty()) {
        return new UnpartitionedWriter(lazyDataPath(), filename, format, conf.value(), factory);

      } else {
        Path baseDataPath = lazyDataPath(); // avoid calling this in the output path function
        Function<PartitionKey, Path> outputPathFunc = key ->
            new Path(new Path(baseDataPath, key.toPath()), filename);

        boolean useObjectStorage = (
            Boolean.parseBoolean(properties.get(OBJECT_STORE_ENABLED)) ||
            OBJECT_STORE_ENABLED_DEFAULT
        );

        if (useObjectStorage) {
          // try to get db and table portions of the path for context in the object store
          String context = pathContext(baseDataPath);
          String objectStore = properties.get(OBJECT_STORE_PATH);
          Preconditions.checkNotNull(objectStore,
              "Cannot use object storage, missing location: " + OBJECT_STORE_PATH);
          Path objectStorePath = new Path(objectStore);

          outputPathFunc = key -> {
            String partitionAndFilename = key.toPath() + "/" + filename;
            int hash = HASH_FUNC.apply(partitionAndFilename);
            return new Path(objectStorePath,
                String.format("%08x/%s/%s", hash, context, partitionAndFilename));
          };
        }

        return new PartitionedWriter(spec, format, conf.value(), factory, outputPathFunc);
      }
    }

    private static String pathContext(Path dataPath) {
      Path parent = dataPath.getParent();
      if (parent != null) {
        // remove the data folder
        if (dataPath.getName().equals("data")) {
          return pathContext(parent);
        }

        return parent.getName() + "/" + dataPath.getName();
      }

      return dataPath.getName();
    }

    private Path lazyDataPath() {
      if (dataPath == null) {
        this.dataPath = new Path(dataLocation);
      }
      return dataPath;
    }

    private class SparkAppenderFactory implements AppenderFactory<InternalRow> {
      public FileAppender<InternalRow> newAppender(OutputFile file, FileFormat format) {
        Schema schema = spec.schema();
        try {
          switch (format) {
            case PARQUET:
              String jsonSchema = convert(schema).json();
              return Parquet.write(file)
                  .writeSupport(new ParquetWriteSupport())
                  .set("org.apache.spark.sql.parquet.row.attributes", jsonSchema)
                  .set("spark.sql.parquet.writeLegacyFormat", "false")
                  .set("spark.sql.parquet.binaryAsString", "false")
                  .set("spark.sql.parquet.int96AsTimestamp", "false")
                  .set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MICROS")
                  .setAll(properties)
                  .schema(schema)
                  .build();

            case AVRO:
              return Avro.write(file)
                  .createWriterFunc(ignored -> new SparkAvroWriter(schema))
                  .setAll(properties)
                  .schema(schema)
                  .build();

            case ORC: {
              @SuppressWarnings("unchecked")
              SparkOrcWriter writer = new SparkOrcWriter(ORC.write(file)
                  .schema(schema)
                  .build());
              return writer;
            }
            default:
              throw new UnsupportedOperationException("Cannot write unknown format: " + format);
          }
        } catch (IOException e) {
          throw new RuntimeIOException(e);
        }
      }
    }
  }

  private interface AppenderFactory<T> {
    FileAppender<T> newAppender(OutputFile file, FileFormat format);
  }

  private static class UnpartitionedWriter implements DataWriter<InternalRow>, Closeable {
    private final Path file;
    private final Configuration conf;
    private FileAppender<InternalRow> appender = null;
    private Metrics metrics = null;

    UnpartitionedWriter(Path dataPath, String filename, FileFormat format,
                        Configuration conf, AppenderFactory<InternalRow> factory) {
      this.file = new Path(dataPath, filename);
      this.appender = factory.newAppender(HadoopOutputFile.fromPath(file, conf), format);
      this.conf = conf;
    }

    @Override
    public void write(InternalRow record) {
      appender.add(record);
    }

    @Override
    public WriterCommitMessage commit() throws IOException {
      Preconditions.checkArgument(appender != null, "Commit called on a closed writer: %s", this);

      close();

      if (metrics.recordCount() == 0L) {
        FileSystem fs = file.getFileSystem(conf);
        fs.delete(file, false);
        return new TaskCommit();
      }

      InputFile inFile = HadoopInputFile.fromPath(file, conf);
      DataFile dataFile = DataFiles.fromInputFile(inFile, null, metrics);

      return new TaskCommit(dataFile);
    }

    @Override
    public void abort() throws IOException {
      Preconditions.checkArgument(appender != null, "Abort called on a closed writer: %s", this);

      close();

      FileSystem fs = file.getFileSystem(conf);
      fs.delete(file, false);
    }

    @Override
    public void close() throws IOException {
      if (this.appender != null) {
        this.appender.close();
        this.metrics = appender.metrics();
        this.appender = null;
      }
    }
  }

  private static class PartitionedWriter implements DataWriter<InternalRow> {
    private final Set<PartitionKey> completedPartitions = Sets.newHashSet();
    private final List<DataFile> completedFiles = Lists.newArrayList();
    private final PartitionSpec spec;
    private final FileFormat format;
    private final Configuration conf;
    private final AppenderFactory<InternalRow> factory;
    private final Function<PartitionKey, Path> outputPathFunc;
    private final PartitionKey key;

    private PartitionKey currentKey = null;
    private FileAppender<InternalRow> currentAppender = null;
    private Path currentPath = null;

    PartitionedWriter(PartitionSpec spec, FileFormat format, Configuration conf,
                      AppenderFactory<InternalRow> factory,
                      Function<PartitionKey, Path> outputPathFunc) {
      this.spec = spec;
      this.format = format;
      this.conf = conf;
      this.factory = factory;
      this.outputPathFunc = outputPathFunc;
      this.key = new PartitionKey(spec);
    }

    @Override
    public void write(InternalRow row) throws IOException {
      key.partition(row);

      if (!key.equals(currentKey)) {
        closeCurrent();

        if (completedPartitions.contains(key)) {
          // if rows are not correctly grouped, detect and fail the write
          PartitionKey existingKey = Iterables.find(completedPartitions, key::equals, null);
          LOG.warn("Duplicate key: {} == {}", existingKey, key);
          throw new IllegalStateException("Already closed file for partition: " + key.toPath());
        }

        this.currentKey = key.copy();
        this.currentPath = outputPathFunc.apply(currentKey);
        OutputFile file = HadoopOutputFile.fromPath(currentPath, conf);
        this.currentAppender = factory.newAppender(file, format);
      }

      currentAppender.add(row);
    }

    @Override
    public WriterCommitMessage commit() throws IOException {
      closeCurrent();
      return new TaskCommit(completedFiles);
    }

    @Override
    public void abort() throws IOException {
      FileSystem fs = currentPath.getFileSystem(conf);

      // clean up files created by this writer
      Tasks.foreach(completedFiles)
          .throwFailureWhenFinished()
          .noRetry()
          .run(file -> fs.delete(new Path(file.path().toString())), IOException.class);

      if (currentAppender != null) {
        currentAppender.close();
        this.currentAppender = null;
        fs.delete(currentPath);
      }
    }

    private void closeCurrent() throws IOException {
      if (currentAppender != null) {
        currentAppender.close();
        // metrics are only valid after the appender is closed
        Metrics metrics = currentAppender.metrics();
        this.currentAppender = null;

        InputFile inFile = HadoopInputFile.fromPath(currentPath, conf);
        DataFile dataFile = DataFiles.builder(spec)
            .withInputFile(inFile)
            .withPartition(currentKey)
            .withMetrics(metrics)
            .build();

        completedPartitions.add(currentKey);
        completedFiles.add(dataFile);
      }
    }
  }
}