/*
 * Aiven Kafka GCS Connector
 * Copyright (c) 2019 Aiven Oy
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package io.aiven.kafka.connect.gcs;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.sink.SinkRecord;

import io.aiven.kafka.connect.gcs.config.CompressionType;
import io.aiven.kafka.connect.gcs.config.GcsSinkConfig;
import io.aiven.kafka.connect.gcs.testutils.BucketAccessor;

import com.google.cloud.storage.Storage;
import com.google.cloud.storage.contrib.nio.testing.LocalStorageHelper;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;

final class GcsSinkTaskTest {

    private static final String TEST_BUCKET = "test-bucket";

    private static Storage storage;
    private static BucketAccessor testBucketAccessor;

    private static Map<String, String> properties;

    private final List<SinkRecord> basicRecords = Arrays.asList(
        createRecord("topic0", 0, "key0", "value0", 10, 1000),
        createRecord("topic0", 1, "key1", "value1", 20, 1001),
        createRecord("topic1", 0, "key2", "value2", 30, 1002),
        createRecord("topic1", 1, "key3", "value3", 40, 1003),
        createRecord("topic0", 2, "key4", "value4", 50, 1004),

        createRecord("topic0", 0, "key5", "value5", 11, 1005),
        createRecord("topic0", 1, "key6", "value6", 21, 1006),
        createRecord("topic1", 0, "key7", "value7", 31, 1007),
        createRecord("topic1", 1, "key8", "value8", 41, 1008),
        createRecord("topic0", 2, "key9", "value9", 51, 1009)
    );

    @BeforeEach
    final void setUp() {
        storage = LocalStorageHelper.getOptions().getService();
        testBucketAccessor = new BucketAccessor(storage, TEST_BUCKET);

        properties = new HashMap<>();
        properties.put(GcsSinkConfig.GCS_BUCKET_NAME_CONFIG, TEST_BUCKET);
    }

    @Test
    final void version() {
        final GcsSinkTask task = new GcsSinkTask(properties, storage);
        assertEquals("test-version", task.version());
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void basic(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        task.put(basicRecords);
        task.flush(null);

        final Map<String, Collection<List<String>>> blobNameWithExtensionValuesMap =
                buildBlobNameValuesMap(compression);

        assertEquals(
                blobNameWithExtensionValuesMap.keySet(),
                Sets.newHashSet(testBucketAccessor.getBlobNames()));

        blobNameWithExtensionValuesMap.keySet().forEach(blobNameWithExtension -> {
            final Collection<List<String>> expected = blobNameWithExtensionValuesMap.get(blobNameWithExtension);
            final Collection<List<String>> actual = readSplittedAndDecodedLinesFromBlob(
                    blobNameWithExtension, compression, 0);
            assertIterableEquals(expected, actual);
        });
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void basicValuesPlain(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        properties.put(GcsSinkConfig.FORMAT_OUTPUT_FIELDS_VALUE_ENCODING_CONFIG, "none");
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        task.put(basicRecords);
        task.flush(null);

        final Map<String, Collection<List<String>>> blobNameWithExtensionValuesMap =
                buildBlobNameValuesMap(compression);

        assertEquals(
                blobNameWithExtensionValuesMap.keySet(),
                Sets.newHashSet(testBucketAccessor.getBlobNames()));

        blobNameWithExtensionValuesMap.keySet().forEach(blobNameWithExtension -> {
            final Collection<List<String>> expected = blobNameWithExtensionValuesMap.get(blobNameWithExtension);
            final Collection<List<String>> actual = readSplittedAndDecodedLinesFromBlob(
                    blobNameWithExtension, compression);
            assertIterableEquals(expected, actual);
        });
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void compression(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        task.put(basicRecords);
        task.flush(null);

        final CompressionType compressionType = CompressionType.forName(compression);

        final List<String> names = Lists.newArrayList("topic0-0-10",
                "topic0-1-20", "topic0-2-50", "topic1-0-30", "topic1-1-40");
        final List<String> blobNames = names.stream()
                .map(n -> n + compressionType.extension()).collect(Collectors.toList());

        assertIterableEquals(blobNames, testBucketAccessor.getBlobNames());
        assertIterableEquals(
            Lists.newArrayList(Collections.singletonList("value0"), Collections.singletonList("value5")),
            readSplittedAndDecodedLinesFromBlob("topic0-0-10" + compressionType.extension(), compression, 0));
        assertIterableEquals(
            Lists.newArrayList(Collections.singletonList("value1"), Collections.singletonList("value6")),
            readSplittedAndDecodedLinesFromBlob("topic0-1-20" + compressionType.extension(), compression, 0));
        assertIterableEquals(
            Lists.newArrayList(Collections.singletonList("value4"), Collections.singletonList("value9")),
            readSplittedAndDecodedLinesFromBlob("topic0-2-50" + compressionType.extension(), compression, 0));
        assertIterableEquals(
            Lists.newArrayList(Collections.singletonList("value2"), Collections.singletonList("value7")),
            readSplittedAndDecodedLinesFromBlob("topic1-0-30" + compressionType.extension(), compression, 0));
        assertIterableEquals(
            Lists.newArrayList(Collections.singletonList("value3"), Collections.singletonList("value8")),
            readSplittedAndDecodedLinesFromBlob("topic1-1-40" + compressionType.extension(), compression, 0));
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void allFields(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        properties.put(GcsSinkConfig.FORMAT_OUTPUT_FIELDS_CONFIG, "key,value,timestamp,offset");
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        task.put(basicRecords);
        task.flush(null);

        final CompressionType compressionType = CompressionType.forName(compression);

        assertIterableEquals(
            Lists.newArrayList("topic0-0-10" + compressionType.extension(),
                    "topic0-1-20" + compressionType.extension(),
                    "topic0-2-50" + compressionType.extension(),
                    "topic1-0-30" + compressionType.extension(),
                    "topic1-1-40" + compressionType.extension()),
            testBucketAccessor.getBlobNames());
        assertIterableEquals(
            Lists.newArrayList(
                Arrays.asList("key0", "value0", "1000", "10"),
                Arrays.asList("key5", "value5", "1005", "11")),
            readSplittedAndDecodedLinesFromBlob("topic0-0-10" + compressionType.extension(),
                    compression, 0, 1));
        assertIterableEquals(
            Lists.newArrayList(
                Arrays.asList("key1", "value1", "1001", "20"),
                Arrays.asList("key6", "value6", "1006", "21")),
            readSplittedAndDecodedLinesFromBlob("topic0-1-20" + compressionType.extension(),
                    compression, 0, 1));
        assertIterableEquals(
            Lists.newArrayList(
                Arrays.asList("key4", "value4", "1004", "50"),
                Arrays.asList("key9", "value9", "1009", "51")),
            readSplittedAndDecodedLinesFromBlob("topic0-2-50" + compressionType.extension(),
                    compression, 0, 1));
        assertIterableEquals(
            Lists.newArrayList(
                Arrays.asList("key2", "value2", "1002", "30"),
                Arrays.asList("key7", "value7", "1007", "31")),
            readSplittedAndDecodedLinesFromBlob("topic1-0-30" + compressionType.extension(),
                    compression, 0, 1));
        assertIterableEquals(
            Lists.newArrayList(
                Arrays.asList("key3", "value3", "1003", "40"),
                Arrays.asList("key8", "value8", "1008", "41")),
            readSplittedAndDecodedLinesFromBlob("topic1-1-40" + compressionType.extension(),
                    compression, 0, 1));
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void nullKeyValueAndTimestamp(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        properties.put(GcsSinkConfig.FORMAT_OUTPUT_FIELDS_CONFIG, "key,value,timestamp,offset");
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        final List<SinkRecord> records = Arrays.asList(
            createNullRecord("topic0", 0, 10),
            createNullRecord("topic0", 0, 11),
            createNullRecord("topic0", 0, 12)
        );
        task.put(records);
        task.flush(null);

        final CompressionType compressionType = CompressionType.forName(compression);

        assertIterableEquals(
            Lists.newArrayList("topic0-0-10" + compressionType.extension()),
            testBucketAccessor.getBlobNames());
        assertIterableEquals(
            Lists.newArrayList(",,,10", ",,,11", ",,,12"),
            readRawLinesFromBlob("topic0-0-10" + compressionType.extension(), compression));
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void multipleFlush(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        task.put(Arrays.asList(
            createRecord("topic0", 0, "key0", "value0", 100, 1000)));
        task.put(Arrays.asList(
            createRecord("topic0", 0, "key1", "value1", 101, 1001)));
        task.flush(null);
        task.put(Arrays.asList(
            createRecord("topic0", 0, "key2", "value2", 102, 1002)));
        task.put(Arrays.asList(
            createRecord("topic0", 0, "key3", "value3", 103, 1003)));
        task.flush(null);
        task.put(Arrays.asList(
            createRecord("topic0", 0, "key4", "value4", 104, 1004)));
        task.put(Arrays.asList(
            createRecord("topic0", 0, "key5", "value5", 105, 1005)));
        task.flush(null);

        final CompressionType compressionType = CompressionType.forName(compression);

        assertIterableEquals(
            Lists.newArrayList(
                    "topic0-0-100" + compressionType.extension(),
                    "topic0-0-102" + compressionType.extension(),
                    "topic0-0-104" + compressionType.extension()),
            testBucketAccessor.getBlobNames());
        assertIterableEquals(
            Lists.newArrayList(Arrays.asList("value0"), Arrays.asList("value1")),
            readSplittedAndDecodedLinesFromBlob("topic0-0-100" + compressionType.extension(),
                    compression, 0));
        assertIterableEquals(
            Lists.newArrayList(Arrays.asList("value2"), Arrays.asList("value3")),
            readSplittedAndDecodedLinesFromBlob("topic0-0-102" + compressionType.extension(),
                    compression, 0));
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void maxRecordPerFile(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        properties.put(GcsSinkConfig.FILE_MAX_RECORDS, "1");
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        final int recordNum = 100;

        for (int i = 0; i < recordNum; i++) {
            final SinkRecord record = createRecord("topic0", 0, "key" + i, "value" + i, i, i);
            task.put(Collections.singletonList(record));
        }
        task.flush(null);

        final CompressionType compressionType = CompressionType.forName(compression);

        assertIterableEquals(
            IntStream.range(0, recordNum).mapToObj(i -> "topic0-0-" + i + compressionType.extension())
                    .sorted().collect(Collectors.toList()),
            testBucketAccessor.getBlobNames()
        );
        for (int i = 0; i < recordNum; i++) {
            assertIterableEquals(
                Collections.singletonList(Collections.singletonList("value" + i)),
                readSplittedAndDecodedLinesFromBlob("topic0-0-" + i + compressionType.extension(), compression, 0)
            );
        }
    }

    @Test
    final void prefix() {
        properties.put(GcsSinkConfig.FILE_NAME_PREFIX_CONFIG, "prefix-");
        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        task.put(basicRecords);
        task.flush(null);

        assertIterableEquals(
            Lists.newArrayList(
                "prefix-topic0-0-10",
                "prefix-topic0-1-20",
                "prefix-topic0-2-50",
                "prefix-topic1-0-30",
                "prefix-topic1-1-40"),
            testBucketAccessor.getBlobNames()
        );
    }

    @ParameterizedTest
    @ValueSource(strings = {"none", "gzip", "snappy", "zstd"})
    final void groupByKey(final String compression) {
        properties.put(GcsSinkConfig.FILE_COMPRESSION_TYPE_CONFIG, compression);
        properties.put(GcsSinkConfig.FORMAT_OUTPUT_FIELDS_CONFIG, "key,value");
        properties.put("file.name.template", "{{key}}");

        final GcsSinkTask task = new GcsSinkTask(properties, storage);

        final List<SinkRecord> records = Arrays.asList(
            createRecordStringKey("topic0", 0, "key0", "value0", 10, 1000),
            createRecordStringKey("topic0", 1, "key1", "value1", 20, 1001),
            createRecordStringKey("topic1", 0, "key2", "value2", 30, 1002),

            createRecordStringKey("topic0", 0, "key1", "value3", 11, 1005),
            createRecordStringKey("topic0", 1, "key1", "value4", 21, 1006),
            createRecordStringKey("topic1", 0, null, "value5", 31, 1007),

            createRecordStringKey("topic0", 0, "key0", "value6", 12, 1009),
            createRecordStringKey("topic0", 1, "key1", "value7", 22, 1010),
            createRecordStringKey("topic1", 0, "key1", "value8", 32, 1011)
        );

        task.put(records);
        task.flush(null);

        assertIterableEquals(
            Lists.newArrayList("key0", "key1", "key2", "null"),
            testBucketAccessor.getBlobNames());

        assertIterableEquals(
            Arrays.asList(Arrays.asList("key0", "value6")),
            readSplittedAndDecodedLinesFromBlob("key0", compression, 0, 1));
        assertIterableEquals(
            Arrays.asList(Arrays.asList("key1", "value8")),
            readSplittedAndDecodedLinesFromBlob("key1", compression, 0, 1));
        assertIterableEquals(
            Arrays.asList(Arrays.asList("key2", "value2")),
            readSplittedAndDecodedLinesFromBlob("key2", compression, 0, 1));
        assertIterableEquals(
            Arrays.asList(Arrays.asList("", "value5")), // null is written as an empty string to files
            readSplittedAndDecodedLinesFromBlob("null", compression, 0, 1));
    }

    private SinkRecord createRecord(final String topic,
                                    final int partition,
                                    final String key,
                                    final String value,
                                    final int offset,
                                    final long timestamp) {
        return new SinkRecord(
            topic,
            partition,
            Schema.BYTES_SCHEMA,
            key.getBytes(StandardCharsets.UTF_8),
            Schema.BYTES_SCHEMA,
            value.getBytes(StandardCharsets.UTF_8),
            offset,
            timestamp,
            TimestampType.CREATE_TIME);
    }

    private SinkRecord createRecordStringKey(final String topic,
                                             final int partition,
                                             final String key,
                                             final String value,
                                             final int offset,
                                             final long timestamp) {
        return new SinkRecord(
            topic,
            partition,
            Schema.OPTIONAL_STRING_SCHEMA,
            key,
            Schema.BYTES_SCHEMA,
            value.getBytes(StandardCharsets.UTF_8),
            offset,
            timestamp,
            TimestampType.CREATE_TIME);
    }

    private SinkRecord createNullRecord(final String topic,
                                        final int partition,
                                        final int offset) {
        return new SinkRecord(
            topic,
            partition,
            Schema.BYTES_SCHEMA,
            null,
            Schema.BYTES_SCHEMA,
            null,
            offset,
            null,
            TimestampType.NO_TIMESTAMP_TYPE);
    }

    private Collection<String> readRawLinesFromBlob(
        final String blobName,
        final String compression) {
        return testBucketAccessor.readLines(blobName, compression);
    }

    private Collection<List<String>> readSplittedAndDecodedLinesFromBlob(
        final String blobName,
        final String compression,
        final int... fieldsToDecode) {
        return testBucketAccessor.readAndDecodeLines(blobName, compression, fieldsToDecode);
    }

    private Map<String, Collection<List<String>>> buildBlobNameValuesMap(final String compression) {
        final CompressionType compressionType = CompressionType.forName(compression);
        final String extension = compressionType.extension();
        final Map<String, Collection<List<String>>> blobNameValuesMap = new HashMap<>();
        blobNameValuesMap.put("topic0-0-10" + extension, toCollectionOfLists("value0", "value5"));
        blobNameValuesMap.put("topic0-1-20" + extension, toCollectionOfLists("value1", "value6"));
        blobNameValuesMap.put("topic1-0-30" + extension, toCollectionOfLists("value2", "value7"));
        blobNameValuesMap.put("topic1-1-40" + extension, toCollectionOfLists("value3", "value8"));
        blobNameValuesMap.put("topic0-2-50" + extension, toCollectionOfLists("value4", "value9"));
        return blobNameValuesMap;
    }

    /*
     * example
     * Input: "value0", "value5"
     * Output: Collection[List["value0"], List["value5"]]
     */
    private Collection<List<String>> toCollectionOfLists(final String... values) {
        return toCollectionOfLists(Lists.newArrayList(values));
    }

    private Collection<List<String>> toCollectionOfLists(final List<String> values) {
        return values.stream().map(Collections::singletonList).collect(Collectors.toList());
    }
}