java source code of Reader

iceberg-master
- orc
  - src
    - main
      - java
        com
        netflix
        iceberg
        orc
        ORC.java
        ColumnIdMap.java
        OrcIterator.java
        TypeConversion.java
        OrcFileAppender.java
- gradle
  - wrapper
    - gradle-wrapper.properties
- images
- examples
  - Convert table to Iceberg.ipynb
- LICENSE
- api
  - src
    - main
      - java
        com
        netflix
        iceberg
        FileScanTask.java
        ReplacePartitions.java
        StructLike.java
        UpdateSchema.java
        Rollback.java
        CombinedScanTask.java
        FileFormat.java
        Snapshot.java
        PendingUpdate.java
        Transaction.java
        Schema.java
        RewriteFiles.java
        PartitionSpec.java
        UpdateProperties.java
        Files.java
        Filterable.java
        expressions
        Expressions.java
        True.java
        ResidualEvaluator.java
        Projections.java
        Binder.java
        SerializationProxies.java
        BoundPredicate.java
        Evaluator.java
        Or.java
        False.java
        Not.java
        RewriteNot.java
        BoundReference.java
        And.java
        InclusiveManifestEvaluator.java
        Reference.java
        NamedReference.java
        ExpressionVisitors.java
        Literals.java
        InclusiveMetricsEvaluator.java
        Literal.java
        Predicate.java
        StrictMetricsEvaluator.java
        UnboundPredicate.java
        Expression.java
        ScanTask.java
        Metrics.java
        types
        IndexById.java
        PruneColumns.java
        FindTypeVisitor.java
        AssignFreshIds.java
        ReassignIds.java
        CheckCompatibility.java
        Types.java
        PrimitiveHolder.java
        TypeUtil.java
        Conversions.java
        Comparators.java
        IndexByName.java
        GetProjectedIds.java
        Type.java
        ExpireSnapshots.java
        DataFile.java
        ManifestFile.java
        Tables.java
        PartitionField.java
        io
        PositionOutputStream.java
        FileAppender.java
        DelegatingInputStream.java
        SeekableInputStream.java
        CloseableIterable.java
        InputFile.java
        OutputFile.java
        CloseableGroup.java
        DelegatingOutputStream.java
        exceptions
        RuntimeIOException.java
        CommitFailedException.java
        NoSuchTableException.java
        ValidationException.java
        AlreadyExistsException.java
        Table.java
        TableScan.java
        OverwriteFiles.java
        AppendFiles.java
        transforms
        Identity.java
        Transforms.java
        ProjectionUtil.java
        Timestamps.java
        TransformUtil.java
        Truncate.java
        PartitionSpecVisitor.java
        Bucket.java
        Dates.java
        Transform.java
        DeleteFiles.java
        events
        ScanEvent.java
        Listener.java
        Listeners.java
    - test
      - java
        com
        netflix
        iceberg
        TestHelpers.java
        expressions
        TestNumericLiteralConversions.java
        TestEvaluatior.java
        TestPredicateBinding.java
        TestInclusiveMetricsEvaluator.java
        TestMiscLiteralConversions.java
        TestStringLiteralConversions.java
        TestStrictMetricsEvaluator.java
        TestLiteralSerialization.java
        TestInclusiveManifestEvaluator.java
        TestExpressionBinding.java
        TestExpressionHelpers.java
        TestExpressionSerialization.java
        types
        TestReadabilityChecks.java
        TestSerializableTypes.java
        TestComparableComparator.java
        TestBinaryComparator.java
        TestCharSeqComparator.java
        TestPartitionPaths.java
        transforms
        TestProjection.java
        TestBucketing.java
        TestIdentity.java
        TestDates.java
        TestResiduals.java
        TestTransformSerialization.java
        TestTimestamps.java
        TestTruncate.java
        events
        TestListeners.java
- gradlew
- common
  - src
    - main
      - java
        com
        netflix
        iceberg
        common
        DynClasses.java
        DynConstructors.java
        DynMethods.java
        DynFields.java
- parquet
  - src
    - main
      - java
        com
        netflix
        iceberg
        parquet
        TypeToMessageType.java
        ParquetAvroWriter.java
        PruneColumns.java
        ParquetDictionaryRowGroupFilter.java
        TypeWithSchemaVisitor.java
        Parquet.java
        ColumnIterator.java
        ParquetAvroValueReaders.java
        ParquetTypeVisitor.java
        ParquetMetrics.java
        ParquetFilters.java
        ParquetValueWriters.java
        ParquetWriter.java
        ParquetWriteSupport.java
        ParquetIO.java
        ParquetReadSupport.java
        TripleIterator.java
        ColumnWriter.java
        ParquetAvro.java
        ParquetReader.java
        ParquetConversions.java
        ParquetValueWriter.java
        ParquetAvroReader.java
        ParquetWriteAdapter.java
        TripleWriter.java
        ParquetValueReaders.java
        ParquetValueReader.java
        ParquetSchemaUtil.java
        PageIterator.java
        MessageTypeToType.java
        ParquetIterable.java
        ParquetMetricsRowGroupFilter.java
    - test
      - java
        com
        netflix
        iceberg
        avro
        TestReadProjection.java
        TestParquetReadProjection.java
        TestHelpers.java
        parquet
        TestMetricsRowGroupFilter.java
        TestMetricsRowGroupFilterTypes.java
        TestDictionaryRowGroupFilter.java
- spark
  - src
    - main
      - resources
        META-INF
        services
        org.apache.spark.sql.sources.DataSourceRegister
      - scala
        com
        netflix
        iceberg
        spark
        SparkTableUtil.scala
      - java
        com
        netflix
        iceberg
        spark
        SparkTypeVisitor.java
        SparkFilters.java
        source
        Reader.java
        Writer.java
        IcebergSource.java
        Stats.java
        PartitionKey.java
        SparkSchemaUtil.java
        TypeToSparkType.java
        FixupTypes.java
        data
        SparkOrcWriter.java
        SparkValueWriters.java
        SparkOrcReader.java
        SparkAvroWriter.java
        SparkValueReaders.java
        SparkAvroReader.java
        SparkParquetReaders.java
        PruneColumnsWithoutReordering.java
        PruneColumnsWithReordering.java
        SparkExpressions.java
        SparkTypeToType.java
        hacks
        Hive.java
    - test
      - java
        com
        netflix
        iceberg
        spark
        source
        TestAvroScan.java
        TestOrcWrite.java
        TestReadProjection.java
        TestTables.java
        TestIcebergSource.java
        TestSparkReadProjection.java
        TestParquetScan.java
        TestParquetWrite.java
        TestFilteredScan.java
        TestDataFrameWrites.java
        TestOrcScan.java
        SimpleRecord.java
        data
        TestHelpers.java
        CodegenExamples.java
        RandomData.java
        TestParquetAvroWriter.java
        TestParquetAvroReader.java
        TestSparkDateTimes.java
        TestSparkParquetReader.java
        TestSparkAvroReader.java
        AvroDataTest.java
- build.gradle
- pig
  - src
    - main
      - java
        com
        netflix
        iceberg
        pig
        IcebergPigInputFormat.java
        SchemaUtil.java
        PigParquetReader.java
        IcebergStorage.java
    - test
      - java
        com
        netflix
        iceberg
        pig
        SchemaUtilTest.java
- data
  - src
    - main
      - java
        com
        netflix
        iceberg
        data
        avro
        GenericReaders.java
        IcebergDecoder.java
        DataWriter.java
        GenericWriters.java
        IcebergEncoder.java
        DataReader.java
        IcebergGenerics.java
        Record.java
        parquet
        GenericParquetReaders.java
        GenericParquetWriter.java
        TableScanIterable.java
        GenericRecord.java
    - test
      - java
        com
        netflix
        iceberg
        data
        TestLocalScan.java
        avro
        TestGenericReadProjection.java
        TestGenericData.java
        TestSingleMessageEncoding.java
        TestReadProjection.java
        parquet
        TestGenericReadProjection.java
        TestGenericData.java
        DataTest.java
        DataTestHelpers.java
        RandomGenericData.java
- .travis.yml
- README.md
- OSSMETADATA
- hive
  - src
    - main
      - java
        com
        netflix
        iceberg
        hive
        HiveTableOperations.java
        HiveTables.java
        HiveTypeConverter.java
    - test
      - resources
        hive-schema-3.1.0.derby.sql
      - java
        com
        netflix
        iceberg
        hive
        ScriptRunner.java
        HiveTableBaseTest.java
        HiveTablesTest.java
- core
  - src
    - main
      - java
        com
        netflix
        iceberg
        avro
        AvroIO.java
        TypeToSchema.java
        PruneColumns.java
        AvroSchemaUtil.java
        Avro.java
        AvroFileAppender.java
        SchemaToType.java
        GenericAvroReader.java
        ValueReaders.java
        AvroSchemaVisitor.java
        GenericAvroWriter.java
        LogicalMap.java
        BuildAvroProjection.java
        UUIDConversion.java
        ProjectionDatumReader.java
        ValueReader.java
        AvroCustomOrderSchemaVisitor.java
        ValueWriters.java
        AvroIterable.java
        ValueWriter.java
        ScanSummary.java
        GenericPartitionFieldSummary.java
        MergeAppend.java
        BaseMetastoreTables.java
        ConfigProperties.java
        StreamingDelete.java
        RollbackToSnapshot.java
        SystemProperties.java
        FileIO.java
        hadoop
        HadoopInputFile.java
        HadoopFileIO.java
        HadoopTables.java
        HadoopOutputFile.java
        SerializableConfiguration.java
        Util.java
        HadoopTableOperations.java
        HadoopStreams.java
        SerializableByteBufferMap.java
        GenericDataFile.java
        ManifestGroup.java
        SchemaParser.java
        OverwriteData.java
        TableMetadata.java
        PartitionSpecParser.java
        TableProperties.java
        ReplaceFiles.java
        ManifestListWriter.java
        BaseFileScanTask.java
        PartitionData.java
        util
        Tasks.java
        ParallelIterable.java
        BinPacking.java
        Exceptions.java
        JsonUtil.java
        ExceptionUtil.java
        StructLikeWrapper.java
        Pair.java
        CharSequenceWrapper.java
        ThreadPools.java
        SnapshotUpdate.java
        BaseTable.java
        FastAppend.java
        FileHistory.java
        GenericManifestFile.java
        ManifestReader.java
        BaseMetastoreTableOperations.java
        MergingSnapshotUpdate.java
        TableOperations.java
        PropertiesUpdate.java
        ManifestEntry.java
        SnapshotParser.java
        ReplacePartitionsOperation.java
        FilteredManifest.java
        BaseTransaction.java
        RemoveSnapshots.java
        TableMetadataParser.java
        ManifestWriter.java
        DataFiles.java
        PartitionSummary.java
        SchemaUpdate.java
        BaseSnapshot.java
        BaseTableScan.java
        BaseCombinedScanTask.java
        HasTableOperations.java
    - test
      - java
        com
        netflix
        iceberg
        avro
        TestGenericAvro.java
        TestSchemaConversions.java
        TestAvroReadProjection.java
        RandomAvroData.java
        TestReadProjection.java
        AvroTestHelpers.java
        AvroDataTest.java
        TestSnapshotJson.java
        TestReplaceFiles.java
        TestReplacePartitions.java
        TestCreateTransaction.java
        TestReplaceTransaction.java
        hadoop
        TestHadoopCommits.java
        HadoopTableTestBase.java
        TestTables.java
        LocalTableOperations.java
        TestDeleteFiles.java
        TestOverwrite.java
        AssertHelpers.java
        TestSchemaUpdate.java
        util
        TestBinPacking.java
        TableMetadataParserTest.java
        TestFastAppend.java
        TestMergeAppend.java
        TestTableMetadataJson.java
        TestScanSummary.java
        TestTransaction.java
        TableTestBase.java
- settings.gradle
- NOTICE
- .gitignore
- docs
  - current
    - javadoc
      - package-list
      - script.js
      - overview-summary.html
      - overview-frame.html
      - overview-tree.html
      - serialized-form.html
      - help-doc.html
      - constant-values.html
      - deprecated-list.html
      - allclasses-frame.html
      - allclasses-noframe.html
      - com
        netflix
        iceberg
        Schema.html
        package-frame.html
        OverwriteFiles.html
        avro
        ValueWriters.html
        package-frame.html
        AvroIterable.html
        LogicalMap.html
        ValueReaders.StructReader.html
        AvroSchemaVisitor.html
        package-tree.html
        ProjectionDatumReader.html
        Avro.WriteBuilder.html
        Avro.html
        UUIDConversion.html
        ValueWriter.html
        ValueWriters.StructWriter.html
        ValueReaders.html
        ValueReader.html
        Avro.ReadBuilder.html
        package-summary.html
        AvroSchemaUtil.html
        HasTableOperations.html
        DataFiles.Builder.html
        Transaction.html
        SchemaParser.html
        Filterable.html
        orc
        package-frame.html
        ORC.WriteBuilder.html
        OrcFileAppender.html
        ORC.ReadBuilder.html
        package-tree.html
        ORC.html
        package-summary.html
        TypeConversion.html
        OrcIterator.html
        ColumnIdMap.html
        PartitionField.html
        CombinedScanTask.html
        DeleteFiles.html
        ReplacePartitionsOperation.html
        TableMetadataParser.html
        ReplacePartitions.html
        package-tree.html
        hadoop
        package-frame.html
        package-tree.html
        HadoopOutputFile.html
        HadoopTables.html
        HadoopInputFile.html
        package-summary.html
        BaseMetastoreTables.html
        TableScan.html
        Metrics.html
        ManifestReader.html
        ScanSummary.html
        RewriteFiles.html
        common
        package-frame.html
        DynClasses.Builder.html
        DynConstructors.html
        DynMethods.UnboundMethod.html
        package-tree.html
        DynFields.UnboundField.html
        DynMethods.StaticMethod.html
        DynMethods.html
        DynMethods.BoundMethod.html
        DynFields.StaticField.html
        DynConstructors.Builder.html
        DynFields.Builder.html
        DynFields.BoundField.html
        DynFields.html
        DynConstructors.Ctor.html
        DynClasses.html
        DynMethods.Builder.html
        package-summary.html
        DataFile.html
        BaseCombinedScanTask.html
        ScanSummary.PartitionMetrics.html
        PendingUpdate.html
        util
        ParallelIterable.html
        package-frame.html
        Tasks.UnrecoverableException.html
        Tasks.Task.html
        ThreadPools.html
        CharSequenceWrapper.html
        BinPacking.ListPacker.html
        BinPacking.PackingIterable.html
        package-tree.html
        ExceptionUtil.html
        Tasks.FailureTask.html
        Exceptions.html
        Tasks.Builder.html
        BinPacking.html
        Pair.html
        JsonUtil.html
        package-summary.html
        StructLikeWrapper.html
        Tasks.html
        Files.html
        ScanSummary.Builder.html
        TableMetadata.html
        parquet
        ParquetValueReaders.ListReader.html
        package-frame.html
        ParquetMetrics.html
        ParquetValueReaders.ReusableEntry.html
        ParquetValueWriters.RepeatedKeyValueWriter.html
        ParquetDictionaryRowGroupFilter.html
        ParquetValueWriters.RepeatedWriter.html
        ParquetAvroValueReaders.html
        ParquetWriteAdapter.html
        ParquetAvroValueReaders.TimestampMillisReader.html
        TypeToMessageType.html
        package-tree.html
        ParquetValueReaders.html
        ParquetValueReaders.RepeatedKeyValueReader.html
        ParquetValueReaders.StringReader.html
        ParquetValueWriters.StructWriter.html
        ParquetValueReaders.IntAsLongReader.html
        ParquetValueReaders.BytesReader.html
        ParquetValueWriters.PrimitiveWriter.html
        ParquetIterable.html
        ColumnIterator.html
        ParquetValueReaders.MapReader.html
        ParquetValueReader.html
        ParquetTypeVisitor.html
        TripleWriter.html
        ParquetSchemaUtil.html
        ParquetValueReaders.UnboxedReader.html
        ParquetAvroWriter.html
        ParquetValueReaders.LongAsDecimalReader.html
        ParquetValueReaders.IntegerAsDecimalReader.html
        ParquetValueWriters.html
        Parquet.WriteBuilder.html
        ParquetValueReaders.RepeatedReader.html
        ParquetAvroReader.html
        Parquet.html
        ParquetValueWriter.html
        ParquetMetricsRowGroupFilter.html
        ParquetValueReaders.FloatAsDoubleReader.html
        ParquetReader.html
        ParquetValueReaders.StructReader.html
        TypeWithSchemaVisitor.html
        ColumnWriter.html
        package-summary.html
        ParquetValueReaders.BinaryAsDecimalReader.html
        ParquetValueReaders.PrimitiveReader.html
        Parquet.ReadBuilder.html
        ParquetAvroValueReaders.TimeMillisReader.html
        Rollback.html
        BaseReplaceFiles.html
        PartitionSpecParser.html
        spark
        package-frame.html
        SparkExpressions.html
        source
        package-frame.html
        IcebergSource.html
        package-tree.html
        package-summary.html
        package-tree.html
        PruneColumnsWithReordering.html
        PruneColumnsWithoutReordering.html
        data
        package-frame.html
        SparkValueReaders.html
        SparkOrcWriter.html
        SparkOrcReader.html
        SparkValueWriters.html
        package-tree.html
        SparkAvroReader.html
        SparkAvroWriter.html
        SparkParquetReaders.html
        package-summary.html
        SparkSchemaUtil.html
        SparkFilters.html
        package-summary.html
        hacks
        package-frame.html
        package-tree.html
        Hive.html
        package-summary.html
        Tables.html
        expressions
        Projections.html
        Binder.html
        package-frame.html
        Projections.ProjectionEvaluator.html
        BoundPredicate.html
        Literal.html
        Predicate.html
        Expression.html
        Reference.html
        package-tree.html
        NamedReference.html
        Or.html
        Not.html
        ExpressionVisitors.ExpressionVisitor.html
        Evaluator.html
        And.html
        UnboundPredicate.html
        Expressions.html
        True.html
        InclusiveMetricsEvaluator.html
        Expression.Operation.html
        ExpressionVisitors.html
        StrictMetricsEvaluator.html
        False.html
        package-summary.html
        ResidualEvaluator.html
        BoundReference.html
        ExpressionVisitors.BoundExpressionVisitor.html
        FilteredManifest.html
        OverwriteData.html
        FilteredSnapshot.html
        FileFormat.html
        DataFiles.html
        pig
        package-frame.html
        IcebergStorage.html
        package-tree.html
        IcebergPigInputFormat.IcebergRecordReader.html
        IcebergPigInputFormat.html
        SchemaUtil.html
        package-summary.html
        PigParquetReader.html
        data
        package-frame.html
        avro
        package-frame.html
        DataReader.html
        DataWriter.html
        IcebergEncoder.html
        package-tree.html
        IcebergDecoder.html
        package-summary.html
        IcebergGenerics.html
        package-tree.html
        IcebergGenerics.ScanBuilder.html
        Record.html
        parquet
        package-frame.html
        GenericParquetWriter.html
        package-tree.html
        GenericParquetReaders.html
        package-summary.html
        GenericRecord.html
        package-summary.html
        types
        package-frame.html
        Types.FixedType.html
        Types.DateType.html
        Types.ListType.html
        Types.TimeType.html
        Type.NestedType.html
        Type.PrimitiveType.html
        Types.UUIDType.html
        TypeUtil.NextID.html
        Types.StructType.html
        TypeUtil.SchemaVisitor.html
        package-tree.html
        Types.IntegerType.html
        Types.BooleanType.html
        Types.DecimalType.html
        Types.LongType.html
        Types.StringType.html
        Types.MapType.html
        Type.html
        Types.DoubleType.html
        Types.TimestampType.html
        TypeUtil.html
        Conversions.html
        Types.BinaryType.html
        Types.FloatType.html
        Types.NestedField.html
        TypeUtil.CustomOrderSchemaVisitor.html
        package-summary.html
        CheckCompatibility.html
        Types.html
        Comparators.html
        IndexByName.html
        Type.TypeID.html
        TableProperties.html
        io
        package-frame.html
        DelegatingOutputStream.html
        CloseableIterable.html
        OutputFile.html
        package-tree.html
        CloseableGroup.html
        InputFile.html
        DelegatingInputStream.html
        SeekableInputStream.html
        package-summary.html
        FileAppender.html
        PositionOutputStream.html
        exceptions
        package-frame.html
        package-tree.html
        NoSuchTableException.html
        RuntimeIOException.html
        CommitFailedException.html
        AlreadyExistsException.html
        package-summary.html
        ValidationException.html
        TableMetadata.SnapshotLogEntry.html
        FileScanTask.html
        SnapshotParser.html
        PartitionSpec.html
        TableOperations.html
        ExpireSnapshots.html
        BaseTable.html
        Table.html
        UpdateSchema.html
        AppendFiles.html
        package-summary.html
        PartitionSpec.Builder.html
        Snapshot.html
        StructLike.html
        BaseMetastoreTableOperations.html
        transforms
        package-frame.html
        Transforms.html
        package-tree.html
        Transform.html
        PartitionSpecVisitor.html
        package-summary.html
        UpdateProperties.html
        ScanTask.html
      - index.html
      - stylesheet.css

/*
 * Copyright 2017 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netflix.iceberg.spark.source;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.netflix.iceberg.CombinedScanTask;
import com.netflix.iceberg.DataFile;
import com.netflix.iceberg.FileScanTask;
import com.netflix.iceberg.PartitionField;
import com.netflix.iceberg.PartitionSpec;
import com.netflix.iceberg.Schema;
import com.netflix.iceberg.SchemaParser;
import com.netflix.iceberg.StructLike;
import com.netflix.iceberg.Table;
import com.netflix.iceberg.TableScan;
import com.netflix.iceberg.avro.Avro;
import com.netflix.iceberg.common.DynMethods;
import com.netflix.iceberg.exceptions.RuntimeIOException;
import com.netflix.iceberg.expressions.Expression;
import com.netflix.iceberg.hadoop.HadoopInputFile;
import com.netflix.iceberg.io.CloseableIterable;
import com.netflix.iceberg.io.InputFile;
import com.netflix.iceberg.parquet.Parquet;
import com.netflix.iceberg.spark.SparkExpressions;
import com.netflix.iceberg.spark.SparkSchemaUtil;
import com.netflix.iceberg.spark.data.SparkAvroReader;
import com.netflix.iceberg.spark.data.SparkOrcReader;
import com.netflix.iceberg.spark.data.SparkParquetReaders;
import com.netflix.iceberg.types.TypeUtil;
import com.netflix.iceberg.types.Types;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.Attribute;
import org.apache.spark.sql.catalyst.expressions.AttributeReference;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.catalyst.expressions.JoinedRow;
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection;
import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
import org.apache.spark.sql.sources.v2.reader.DataReader;
import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
import org.apache.spark.sql.sources.v2.reader.DataReaderFactory;
import org.apache.spark.sql.sources.v2.reader.Statistics;
import org.apache.spark.sql.sources.v2.reader.SupportsPushDownCatalystFilters;
import org.apache.spark.sql.sources.v2.reader.SupportsPushDownRequiredColumns;
import org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics;
import org.apache.spark.sql.sources.v2.reader.SupportsScanUnsafeRow;
import org.apache.spark.sql.types.BinaryType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.sql.types.DecimalType;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.unsafe.types.UTF8String;
import org.apache.spark.util.SerializableConfiguration;
import java.io.Closeable;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Function;

import static com.google.common.collect.Iterators.transform;
import static com.netflix.iceberg.spark.SparkSchemaUtil.convert;
import static com.netflix.iceberg.spark.SparkSchemaUtil.prune;
import static scala.collection.JavaConverters.asScalaBufferConverter;
import static scala.collection.JavaConverters.seqAsJavaListConverter;

class Reader implements DataSourceReader, SupportsScanUnsafeRow, SupportsPushDownCatalystFilters,
    SupportsPushDownRequiredColumns, SupportsReportStatistics {

  private static final org.apache.spark.sql.catalyst.expressions.Expression[] NO_EXPRS =
      new org.apache.spark.sql.catalyst.expressions.Expression[0];

  private final Table table;
  private final SerializableConfiguration conf;
  private StructType requestedSchema = null;
  private List<Expression> filterExpressions = null;
  private org.apache.spark.sql.catalyst.expressions.Expression[] pushedExprs = NO_EXPRS;

  // lazy variables
  private Schema schema = null;
  private StructType type = null; // cached because Spark accesses it multiple times
  private List<CombinedScanTask> tasks = null; // lazy cache of tasks

  Reader(Table table, Configuration conf) {
    this.table = table;
    this.conf = new SerializableConfiguration(conf);
    this.schema = table.schema();
  }

  private Schema lazySchema() {
    if (schema == null) {
      if (requestedSchema != null) {
        this.schema = prune(table.schema(), requestedSchema);
      } else {
        this.schema = table.schema();
      }
    }
    return schema;
  }

  private StructType lazyType() {
    if (type == null) {
      this.type = convert(lazySchema());
    }
    return type;
  }

  @Override
  public StructType readSchema() {
    return lazyType();
  }

  @Override
  public List<DataReaderFactory<UnsafeRow>> createUnsafeRowReaderFactories() {
    String tableSchemaString = SchemaParser.toJson(table.schema());
    String expectedSchemaString = SchemaParser.toJson(lazySchema());

    List<DataReaderFactory<UnsafeRow>> readTasks = Lists.newArrayList();
    for (CombinedScanTask task : tasks()) {
      readTasks.add(new ReadTask(task, tableSchemaString, expectedSchemaString, conf));
    }

    return readTasks;
  }

  @Override
  public org.apache.spark.sql.catalyst.expressions.Expression[] pushCatalystFilters(
      org.apache.spark.sql.catalyst.expressions.Expression[] filters) {
    this.tasks = null; // invalidate cached tasks, if present

    List<Expression> expressions = Lists.newArrayListWithExpectedSize(filters.length);
    List<org.apache.spark.sql.catalyst.expressions.Expression> pushed =
        Lists.newArrayListWithExpectedSize(filters.length);

    for (org.apache.spark.sql.catalyst.expressions.Expression filter : filters) {
      Expression expr = SparkExpressions.convert(filter);
      if (expr != null) {
        expressions.add(expr);
        pushed.add(filter);
      }
    }

    this.filterExpressions = expressions;
    this.pushedExprs = pushed.toArray(new org.apache.spark.sql.catalyst.expressions.Expression[0]);

    // invalidate the schema that will be projected
    this.schema = null;
    this.type = null;

    // Spark doesn't support residuals per task, so return all filters
    // to get Spark to handle record-level filtering
    return filters;
  }

  @Override
  public org.apache.spark.sql.catalyst.expressions.Expression[] pushedCatalystFilters() {
    return pushedExprs;
  }

  @Override
  public void pruneColumns(StructType requestedSchema) {
    this.requestedSchema = requestedSchema;

    // invalidate the schema that will be projected
    this.schema = null;
    this.type = null;
  }

  @Override
  public Statistics getStatistics() {
    long sizeInBytes = 0L;
    long numRows = 0L;

    for (CombinedScanTask task : tasks()) {
      for (FileScanTask file : task.files()) {
        sizeInBytes += file.length();
        numRows += file.file().recordCount();
      }
    }

    return new Stats(sizeInBytes, numRows);
  }

  private List<CombinedScanTask> tasks() {
    if (tasks == null) {
      TableScan scan = table.newScan().project(lazySchema());

      if (filterExpressions != null) {
        for (Expression filter : filterExpressions) {
          scan = scan.filter(filter);
        }
      }

      try (CloseableIterable<CombinedScanTask> tasksIterable = scan.planTasks()) {
        this.tasks = Lists.newArrayList(tasksIterable);
      }  catch (IOException e) {
        throw new RuntimeIOException(e, "Failed to close table scan: %s", scan);
      }
    }

    return tasks;
  }

  @Override
  public String toString() {
    return String.format(
        "IcebergScan(table=%s, type=%s, filters=%s)",
        table, lazySchema().asStruct(), filterExpressions);
  }

  private static class ReadTask implements DataReaderFactory<UnsafeRow>, Serializable {
    private final CombinedScanTask task;
    private final String tableSchemaString;
    private final String expectedSchemaString;
    private final SerializableConfiguration conf;

    private transient Schema tableSchema = null;
    private transient Schema expectedSchema = null;

    private ReadTask(CombinedScanTask task, String tableSchemaString, String expectedSchemaString,
                     SerializableConfiguration conf) {
      this.task = task;
      this.tableSchemaString = tableSchemaString;
      this.expectedSchemaString = expectedSchemaString;
      this.conf = conf;
    }

    @Override
    public DataReader<UnsafeRow> createDataReader() {
      return new TaskDataReader(task, lazyTableSchema(), lazyExpectedSchema(), conf.value());
    }

    private Schema lazyTableSchema() {
      if (tableSchema == null) {
        this.tableSchema = SchemaParser.fromJson(tableSchemaString);
      }
      return tableSchema;
    }

    private Schema lazyExpectedSchema() {
      if (expectedSchema == null) {
        this.expectedSchema = SchemaParser.fromJson(expectedSchemaString);
      }
      return expectedSchema;
    }
  }

  private static class TaskDataReader implements DataReader<UnsafeRow> {
    // for some reason, the apply method can't be called from Java without reflection
    private static final DynMethods.UnboundMethod APPLY_PROJECTION = DynMethods.builder("apply")
        .impl(UnsafeProjection.class, InternalRow.class)
        .build();

    private final Iterator<FileScanTask> tasks;
    private final Schema tableSchema;
    private final Schema expectedSchema;
    private final Configuration conf;

    private Iterator<UnsafeRow> currentIterator = null;
    private Closeable currentCloseable = null;
    private UnsafeRow current = null;

    public TaskDataReader(CombinedScanTask task, Schema tableSchema, Schema expectedSchema, Configuration conf) {
      this.tasks = task.files().iterator();
      this.tableSchema = tableSchema;
      this.expectedSchema = expectedSchema;
      this.conf = conf;
      // open last because the schemas and conf must be set
      this.currentIterator = open(tasks.next());
    }

    @Override
    public boolean next() throws IOException {
      while (true) {
        if (currentIterator.hasNext()) {
          this.current = currentIterator.next();
          return true;

        } else if (tasks.hasNext()) {
          this.currentCloseable.close();
          this.currentIterator = open(tasks.next());

        } else {
          return false;
        }
      }
    }

    @Override
    public UnsafeRow get() {
      return current;
    }

    @Override
    public void close() throws IOException {
      // close the current iterator
      this.currentCloseable.close();

      // exhaust the task iterator
      while (tasks.hasNext()) {
        tasks.next();
      }
    }

    private Iterator<UnsafeRow> open(FileScanTask task) {
      DataFile file = task.file();

      // schema or rows returned by readers
      Schema finalSchema = expectedSchema;
      PartitionSpec spec = task.spec();
      Set<Integer> idColumns = identitySourceIds(spec);

      // schema needed for the projection and filtering
      Schema requiredSchema = prune(tableSchema, convert(finalSchema), task.residual());
      boolean hasJoinedPartitionColumns = !idColumns.isEmpty();
      boolean hasExtraFilterColumns = requiredSchema.columns().size() != finalSchema.columns().size();

      Schema iterSchema;
      Iterator<InternalRow> iter;

      if (hasJoinedPartitionColumns) {
        // schema used to read data files
        Schema readSchema = TypeUtil.selectNot(requiredSchema, idColumns);
        Schema partitionSchema = TypeUtil.select(requiredSchema, idColumns);
        PartitionRowConverter convertToRow = new PartitionRowConverter(partitionSchema, spec);
        JoinedRow joined = new JoinedRow();

        InternalRow partition = convertToRow.apply(file.partition());
        joined.withRight(partition);

        // create joined rows and project from the joined schema to the final schema
        iterSchema = TypeUtil.join(readSchema, partitionSchema);
        iter = transform(open(task, readSchema, conf), joined::withLeft);

      } else if (hasExtraFilterColumns) {
        // add projection to the final schema
        iterSchema = requiredSchema;
        iter = open(task, requiredSchema, conf);

      } else {
        // return the base iterator
        iterSchema = finalSchema;
        iter = open(task, finalSchema, conf);
      }

      return transform(iter,
          APPLY_PROJECTION.bind(projection(finalSchema, iterSchema))::invoke);
    }

    private static UnsafeProjection projection(Schema finalSchema, Schema readSchema) {
      StructType struct = convert(readSchema);

      List<AttributeReference> refs = seqAsJavaListConverter(struct.toAttributes()).asJava();
      List<Attribute> attrs = Lists.newArrayListWithExpectedSize(struct.fields().length);
      List<org.apache.spark.sql.catalyst.expressions.Expression> exprs =
          Lists.newArrayListWithExpectedSize(struct.fields().length);

      for (AttributeReference ref : refs) {
        attrs.add(ref.toAttribute());
      }

      for (Types.NestedField field : finalSchema.columns()) {
        int indexInReadSchema = struct.fieldIndex(field.name());
        exprs.add(refs.get(indexInReadSchema));
      }

      return UnsafeProjection.create(
          asScalaBufferConverter(exprs).asScala().toSeq(),
          asScalaBufferConverter(attrs).asScala().toSeq());
    }

    private static Set<Integer> identitySourceIds(PartitionSpec spec) {
      Set<Integer> sourceIds = Sets.newHashSet();
      List<PartitionField> fields = spec.fields();
      for (int i = 0; i < fields.size(); i += 1) {
        PartitionField field = fields.get(i);
        if ("identity".equals(field.transform().toString())) {
          sourceIds.add(field.sourceId());
        }
      }

      return sourceIds;
    }

    private Iterator<InternalRow> open(FileScanTask task, Schema readSchema,
                                       Configuration conf) {
      InputFile location = HadoopInputFile.fromLocation(task.file().path(), conf);
      CloseableIterable<InternalRow> iter;
      switch (task.file().format()) {
        case ORC:
          SparkOrcReader reader = new SparkOrcReader(location, task, readSchema);
          this.currentCloseable = reader;
          return reader;

        case PARQUET:
          iter = newParquetIterable(location, task, readSchema);
          break;

        case AVRO:
          iter = newAvroIterable(location, task, readSchema);
          break;

        default:
          throw new UnsupportedOperationException(
              "Cannot read unknown format: " + task.file().format());
      }

      this.currentCloseable = iter;

      return iter.iterator();
    }

    private CloseableIterable<InternalRow> newAvroIterable(InputFile location,
                                                      FileScanTask task,
                                                      Schema readSchema) {
      return Avro.read(location)
          .reuseContainers()
          .project(readSchema)
          .split(task.start(), task.length())
          .createReaderFunc(SparkAvroReader::new)
          .build();
    }

    private CloseableIterable<InternalRow> newParquetIterable(InputFile location,
                                                            FileScanTask task,
                                                            Schema readSchema) {
      return Parquet.read(location)
          .project(readSchema)
          .split(task.start(), task.length())
          .createReaderFunc(fileSchema -> SparkParquetReaders.buildReader(readSchema, fileSchema))
          .filter(task.residual())
          .build();
    }
  }

  private static class PartitionRowConverter implements Function<StructLike, InternalRow> {
    private final DataType[] types;
    private final int[] positions;
    private final Class<?>[] javaTypes;
    private final GenericInternalRow reusedRow;

    PartitionRowConverter(Schema partitionSchema, PartitionSpec spec) {
      StructType partitionType = SparkSchemaUtil.convert(partitionSchema);
      StructField[] fields = partitionType.fields();

      this.types = new DataType[fields.length];
      this.positions = new int[types.length];
      this.javaTypes = new Class<?>[types.length];
      this.reusedRow = new GenericInternalRow(types.length);

      List<PartitionField> partitionFields = spec.fields();
      for (int rowIndex = 0; rowIndex < fields.length; rowIndex += 1) {
        this.types[rowIndex] = fields[rowIndex].dataType();

        int sourceId = partitionSchema.columns().get(rowIndex).fieldId();
        for (int specIndex = 0; specIndex < partitionFields.size(); specIndex += 1) {
          PartitionField field = spec.fields().get(specIndex);
          if (field.sourceId() == sourceId && "identity".equals(field.transform().toString())) {
            positions[rowIndex] = specIndex;
            javaTypes[rowIndex] = spec.javaClasses()[specIndex];
            break;
          }
        }
      }
    }

    @Override
    public InternalRow apply(StructLike tuple) {
      for (int i = 0; i < types.length; i += 1) {
        reusedRow.update(i, convert(tuple.get(positions[i], javaTypes[i]), types[i]));
      }

      return reusedRow;
    }

    /**
     * Converts the objects into instances used by Spark's InternalRow.
     *
     * @param value a data value
     * @param type the Spark data type
     * @return the value converted to the representation expected by Spark's InternalRow.
     */
    private static Object convert(Object value, DataType type) {
      if (type instanceof StringType) {
        return UTF8String.fromString(value.toString());
      } else if (type instanceof BinaryType) {
        ByteBuffer buffer = (ByteBuffer) value;
        return buffer.get(new byte[buffer.remaining()]);
      } else if (type instanceof DecimalType) {
        return Decimal.fromDecimal(value);
      }
      return value;
    }
  }

  private static class StructLikeInternalRow implements StructLike {
    private final DataType[] types;
    private InternalRow row = null;

    StructLikeInternalRow(StructType struct) {
      this.types = new DataType[struct.size()];
      StructField[] fields = struct.fields();
      for (int i = 0; i < fields.length; i += 1) {
        types[i] = fields[i].dataType();
      }
    }

    public StructLikeInternalRow setRow(InternalRow row) {
      this.row = row;
      return this;
    }

    @Override
    public int size() {
      return types.length;
    }

    @Override
    @SuppressWarnings("unchecked")
    public <T> T get(int pos, Class<T> javaClass) {
      return javaClass.cast(row.get(pos, types[pos]));
    }

    @Override
    public <T> void set(int pos, T value) {
      throw new UnsupportedOperationException("Not implemented: set");
    }
  }
}