package datasources;

import datasources.utils.DBClientWrapper;
import datasources.utils.DBTableReader;
import org.apache.log4j.Logger;

import edb.common.UnknownTableException;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.sources.v2.DataSourceOptions;
import org.apache.spark.sql.sources.v2.DataSourceV2;
import org.apache.spark.sql.sources.v2.ReadSupport;
import org.apache.spark.sql.sources.v2.reader.DataReader;
import org.apache.spark.sql.sources.v2.reader.DataReaderFactory;
import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
import org.apache.spark.sql.types.StructType;

import java.io.IOException;
import java.util.List;

/**
 * An extremely simple DataSource that supports sequential reads (i.e.: on just one executor)
 * from the ExampleDB. It only supports reads from a single, pre-defined table with a
 * pre-defined schema. This DataSource is probably about a simple as one that reads from a
 * remote database can get.
 */
public class SimpleRowDataSource implements DataSourceV2, ReadSupport {

    static Logger log = Logger.getLogger(SimpleRowDataSource.class.getName());

    /**
     * Spark calls this to create the reader. Notice how it pulls the host and port
     * on which ExampleDB is listening from the supplied options.
     * @param options
     * @return
     */
    @Override
    public DataSourceReader createReader(DataSourceOptions options) {
        String host = options.get("host").orElse("localhost");
        int port = options.getInt("port", -1);
        return new Reader(host, port);
    }

    /**
     * This is how Spark discovers the source table's schema (fixed in this case) and how it
     * obtains the reader factories to be used by the executors to create readers. In this
     * case only one reader factory is created, supporting just one executor, so the
     * resulting Dataset will have only a single partition -- that's why this DataSource
     * only provides sequential reads.
     */
    static class Reader implements DataSourceReader {

        static Logger log = Logger.getLogger(Reader.class.getName());

        public Reader(String host, int port) {
            _host = host;
            _port = port;
        }

        private final StructType schema = new StructType().add("i", "bigint").add("j", "bigint");
        private String _host;
        private int _port;

        @Override
        public StructType readSchema() {
            return schema;
        }

        @Override
        public List<DataReaderFactory<Row>> createDataReaderFactories() {
            log.info("creating a single factory");
            return java.util.Arrays.asList(new SimpleDataReaderFactory(_host, _port));
        }
    }

    /**
     * This is used by a single executor to read from ExampleDB. Notice how in
     * this case it reads from only a single table and reads all of the data since
     * it knows that only one instance will exist at a time -- again because this DataSource
     * only supports sequential data access. Also note that when DBClientWrapper's
     * getTableReader() method is called it reads ALL the data in the table eagerly.
     */
    static class TaskDataReader implements DataReader<Row> {

        static Logger log = Logger.getLogger(TaskDataReader.class.getName());

        public TaskDataReader(String host, int port) throws UnknownTableException {
            log.info("Task reading from [" + host + ":" + port + "]" );
            _db = new DBClientWrapper(host, port);
            _db.connect();
            _reader = _db.getTableReader(tableName, new String[]{"i", "j"});
        }

        private DBClientWrapper _db;

        private DBTableReader _reader;

        private final static String tableName = "theTable";

        @Override
        public boolean next() {
            return _reader.next();
        }

        @Override
        public Row get() {
            return _reader.get();
        }

        @Override
        public void close() throws IOException {
            _db.disconnect();
        }
    }

    /**
     * Note that this has to be serializable. Each instance is sent to an executor,
     * which uses it to create a reader for its own use.
     */
    static class SimpleDataReaderFactory implements DataReaderFactory<Row> {

        static Logger log = Logger.getLogger(SimpleDataReaderFactory.class.getName());

        public SimpleDataReaderFactory(String host, int port) {
            _host = host;
            _port = port;
        }

        private String _host;
        private int _port;

        @Override
        public DataReader<Row> createDataReader() {
            log.info("Factory creating reader for [" + _host + ":" + _port + "]" );
            try {
                return new TaskDataReader(_host, _port);
            } catch (UnknownTableException ute) {
                throw new RuntimeException(ute);
            }
        }

    }


}