/*
 * Copyright 2017 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netflix.iceberg.parquet;

import com.netflix.iceberg.Files;
import com.netflix.iceberg.Schema;
import com.netflix.iceberg.TestHelpers;
import com.netflix.iceberg.exceptions.ValidationException;
import com.netflix.iceberg.expressions.Expression;
import com.netflix.iceberg.io.FileAppender;
import com.netflix.iceberg.io.InputFile;
import com.netflix.iceberg.io.OutputFile;
import com.netflix.iceberg.types.Types.FloatType;
import com.netflix.iceberg.types.Types.IntegerType;
import com.netflix.iceberg.types.Types.LongType;
import com.netflix.iceberg.types.Types.StringType;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.schema.MessageType;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.util.UUID;

import static com.netflix.iceberg.avro.AvroSchemaUtil.convert;
import static com.netflix.iceberg.expressions.Expressions.and;
import static com.netflix.iceberg.expressions.Expressions.equal;
import static com.netflix.iceberg.expressions.Expressions.greaterThan;
import static com.netflix.iceberg.expressions.Expressions.greaterThanOrEqual;
import static com.netflix.iceberg.expressions.Expressions.isNull;
import static com.netflix.iceberg.expressions.Expressions.lessThan;
import static com.netflix.iceberg.expressions.Expressions.lessThanOrEqual;
import static com.netflix.iceberg.expressions.Expressions.not;
import static com.netflix.iceberg.expressions.Expressions.notEqual;
import static com.netflix.iceberg.expressions.Expressions.notNull;
import static com.netflix.iceberg.expressions.Expressions.or;
import static com.netflix.iceberg.types.Types.NestedField.optional;
import static com.netflix.iceberg.types.Types.NestedField.required;

public class TestMetricsRowGroupFilter {
  private static final Schema SCHEMA = new Schema(
      required(1, "id", IntegerType.get()),
      optional(2, "no_stats", StringType.get()),
      required(3, "required", StringType.get()),
      optional(4, "all_nulls", LongType.get()),
      optional(5, "some_nulls", StringType.get()),
      optional(6, "no_nulls", StringType.get()),
      optional(7, "not_in_file", FloatType.get())
  );

  private static final Schema FILE_SCHEMA = new Schema(
      required(1, "_id", IntegerType.get()),
      optional(2, "_no_stats", StringType.get()),
      required(3, "_required", StringType.get()),
      optional(4, "_all_nulls", LongType.get()),
      optional(5, "_some_nulls", StringType.get()),
      optional(6, "_no_nulls", StringType.get())
  );

  private static final String TOO_LONG_FOR_STATS;
  static {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < 200; i += 1) {
      sb.append(UUID.randomUUID().toString());
    }
    TOO_LONG_FOR_STATS = sb.toString();
  }

  private static final File PARQUET_FILE = new File("/tmp/stats-row-group-filter-test.parquet");
  private static MessageType PARQUET_SCHEMA = null;
  private static BlockMetaData ROW_GROUP_METADATA = null;

  @BeforeClass
  public static void createInputFile() throws IOException {
    if (PARQUET_FILE.exists()) {
      Assert.assertTrue(PARQUET_FILE.delete());
    }

    OutputFile outFile = Files.localOutput(PARQUET_FILE);
    try (FileAppender<Record> appender = Parquet.write(outFile)
        .schema(FILE_SCHEMA)
        .build()) {
      GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
      // create 50 records
      for (int i = 0; i < 50; i += 1) {
        builder.set("_id", 30 + i); // min=30, max=79, num-nulls=0
        builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats
        builder.set("_required", "req"); // required, always non-null
        builder.set("_all_nulls", null); // never non-null
        builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
        builder.set("_no_nulls", ""); // optional, but always non-null
        appender.add(builder.build());
      }
    }

    InputFile inFile = Files.localInput(PARQUET_FILE);
    try (ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile))) {
      Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size());
      ROW_GROUP_METADATA = reader.getRowGroups().get(0);
      PARQUET_SCHEMA = reader.getFileMetaData().getSchema();
    }

    PARQUET_FILE.deleteOnExit();
  }

  @Test
  public void testAllNulls() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notNull("all_nulls"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should skip: no non-null value in all null column", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notNull("some_nulls"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: column with some nulls contains a non-null value", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notNull("no_nulls"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: non-null column contains a non-null value", shouldRead);
  }

  @Test
  public void testNoNulls() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, isNull("all_nulls"))
           .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: at least one null value in all null column", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, isNull("some_nulls"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: column with some nulls contains a null value", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, isNull("no_nulls"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should skip: non-null column contains no null values", shouldRead);
  }

  @Test
  public void testRequiredColumn() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notNull("required"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: required columns are always non-null", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, isNull("required"))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should skip: required columns are always non-null", shouldRead);
  }

  @Test
  public void testMissingColumn() {
    TestHelpers.assertThrows("Should complain about missing column in expression",
        ValidationException.class, "Cannot find field 'missing'",
        () -> new ParquetMetricsRowGroupFilter(SCHEMA, lessThan("missing", 5))
            .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA));
  }

  @Test
  public void testColumnNotInFile() {
    Expression[] cannotMatch = new Expression[] {
        lessThan("not_in_file", 1.0f), lessThanOrEqual("not_in_file", 1.0f),
        equal("not_in_file", 1.0f), greaterThan("not_in_file", 1.0f),
        greaterThanOrEqual("not_in_file", 1.0f), notNull("not_in_file")
    };

    for (Expression expr : cannotMatch) {
      boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, expr)
          .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
      Assert.assertFalse("Should skip when column is not in file (all nulls): " + expr, shouldRead);
    }

    Expression[] canMatch = new Expression[] {
        isNull("not_in_file"), notEqual("not_in_file", 1.0f)
    };

    for (Expression expr : canMatch) {
      boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, expr)
          .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
      Assert.assertTrue("Should read when column is not in file (all nulls): " + expr, shouldRead);
    }
  }

  @Test
  public void testMissingStats() {
    Expression[] exprs = new Expression[] {
        lessThan("no_stats", "a"), lessThanOrEqual("no_stats", "b"), equal("no_stats", "c"),
        greaterThan("no_stats", "d"), greaterThanOrEqual("no_stats", "e"),
        notEqual("no_stats", "f"), isNull("no_stats"), notNull("no_stats")
    };

    for (Expression expr : exprs) {
      boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, expr)
          .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
      Assert.assertTrue("Should read when missing stats for expr: " + expr, shouldRead);
    }
  }

  @Test
  public void testZeroRecordFile() {
    BlockMetaData emptyBlock = new BlockMetaData();
    emptyBlock.setRowCount(0);

    Expression[] exprs = new Expression[] {
        lessThan("id", 5), lessThanOrEqual("id", 30), equal("id", 70), greaterThan("id", 78),
        greaterThanOrEqual("id", 90), notEqual("id", 101), isNull("some_nulls"),
        notNull("some_nulls")
    };

    for (Expression expr : exprs) {
      boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, expr)
          .shouldRead(PARQUET_SCHEMA, emptyBlock);
      Assert.assertFalse("Should never read 0-record file: " + expr, shouldRead);
    }
  }

  @Test
  public void testNot() {
    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(lessThan("id", 5)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: not(false)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(greaterThan("id", 5)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should skip: not(true)", shouldRead);
  }

  @Test
  public void testAnd() {
    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA,
        and(lessThan("id", 5), greaterThanOrEqual("id", 0)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should skip: and(false, false)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA,
        and(greaterThan("id", 5), lessThanOrEqual("id", 30)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: and(true, true)", shouldRead);
  }

  @Test
  public void testOr() {
    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA,
        or(lessThan("id", 5), greaterThanOrEqual("id", 80)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should skip: or(false, false)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA,
        or(lessThan("id", 5), greaterThanOrEqual("id", 60)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: or(false, true)", shouldRead);
  }

  @Test
  public void testIntegerLt() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThan("id", 5))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range below lower bound (5 < 30)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThan("id", 30))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range below lower bound (30 is not < 30)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThan("id", 31))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: one possible id", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThan("id", 79))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: may possible ids", shouldRead);
  }

  @Test
  public void testIntegerLtEq() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThanOrEqual("id", 5))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range below lower bound (5 < 30)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThanOrEqual("id", 29))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range below lower bound (29 < 30)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThanOrEqual("id", 30))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: one possible id", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, lessThanOrEqual("id", 79))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: many possible ids", shouldRead);
  }

  @Test
  public void testIntegerGt() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThan("id", 85))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThan("id", 79))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range above upper bound (79 is not > 79)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThan("id", 78))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: one possible id", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThan("id", 75))
          .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: may possible ids", shouldRead);
  }

  @Test
  public void testIntegerGtEq() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThanOrEqual("id", 85))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThanOrEqual("id", 80))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id range above upper bound (80 > 79)", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThanOrEqual("id", 79))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: one possible id", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, greaterThanOrEqual("id", 75))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: may possible ids", shouldRead);
  }

  @Test
  public void testIntegerEq() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 5))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id below lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 29))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id below lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 30))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 75))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 79))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 80))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id above upper bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, equal("id", 85))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertFalse("Should not read: id above upper bound", shouldRead);
  }

  @Test
  public void testIntegerNotEq() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 5))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id below lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 29))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id below lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 30))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 75))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 79))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 80))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id above upper bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, notEqual("id", 85))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id above upper bound", shouldRead);
  }

  @Test
  public void testIntegerNotEqRewritten() {
    boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 5)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id below lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 29)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id below lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 30)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 75)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 79)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 80)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id above upper bound", shouldRead);

    shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(equal("id", 85)))
        .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA);
    Assert.assertTrue("Should read: id above upper bound", shouldRead);
  }
}