/* (c) 2014 LinkedIn Corp. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package com.linkedin.cubert.io.rubix;

import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.io.BlockSerializationType;
import com.linkedin.cubert.utils.ClassCache;
import com.linkedin.cubert.utils.JsonUtils;
import com.linkedin.cubert.utils.print;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.SerializationFactory;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.hadoop.mapred.JobConf;
import org.apache.pig.data.Tuple;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ObjectNode;

public class RubixFile<K, V>
{
    private final Path path;
    private final Configuration conf;
    private JsonNode metadataJson;
    private Class<K> keyClass;
    private Class<V> valueClass;

    private List<KeyData<K>> keyData = null;

    public static class KeyData<K>
    {
        private final K key;
        private final long offset;
        private long length;
        private long blockId;
        private long numRecords;

        public KeyData(K key, long offset, long length, long numRecs, long blockId)
        {
            this.key = key;
            this.offset = offset;
            this.length = length;
            this.numRecords = numRecs;
            this.blockId = blockId;
        }

        public K getKey()
        {
            return key;
        }

        public long getBlockId()
        {
            return blockId;
        }

        public int getReducerId()
        {
            return (int) (getBlockId() >> 32);
        }

        public long getNumRecords()
        {
            return numRecords;
        }

        public long getOffset()
        {
            return offset;
        }

        public long getLength()
        {
            return length;
        }

        void setLength(long length)
        {
            this.length = length;
        }

        @Override
        public String toString()
        {
            return String.format("[key=%s, offset=%d, length=%d, numRecords=%d, blockId=%d]",
                                 key,
                                 offset,
                                 length,
                                 numRecords,
                                 blockId);
        }
    }

    public RubixFile(Configuration conf, Path path)
    {
        this.conf = conf;
        this.path = path;
    }

    public Class<K> getKeyClass() throws IOException,
            InstantiationException,
            IllegalAccessException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        return keyClass;
    }

    public Class<V> getValueClass() throws IOException,
            InstantiationException,
            IllegalAccessException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        return valueClass;
    }

    public BlockSchema getSchema() throws IOException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        return new BlockSchema(metadataJson.get("schema"));
    }

    public String[] getPartitionKeys() throws IOException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        return JsonUtils.asArray(metadataJson.get("partitionKeys"));
    }

    public String[] getSortKeys() throws IOException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        return JsonUtils.asArray(metadataJson.get("sortKeys"));
    }

    public BlockSerializationType getBlockSerializationType() throws IOException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        if (!metadataJson.has("serializationType"))
            return BlockSerializationType.DEFAULT;

        return BlockSerializationType.valueOf(JsonUtils.getText(metadataJson, "serializationType"));
    }

    public String getBlockgenId() throws IOException,
            ClassNotFoundException
    {
        if (keyData == null)
            getKeyData();

        if (!metadataJson.has("BlockgenId"))
            return null;
        return JsonUtils.getText(metadataJson, "BlockgenId");
    }

    public static FileStatus[] getRubixFiles(Path path, FileSystem fs)
        throws IOException
    {
        Path globPath = new Path(path, RubixConstants.RUBIX_EXTENSION_FOR_GLOB);
        return fs.globStatus(globPath);
    }

    public static Path getARubixFile(Configuration conf, Path path) throws IOException
    {
        FileSystem fs = path.getFileSystem(conf);
        if (fs.getFileStatus(path).isDir())
        {
            FileStatus[] allFiles = getRubixFiles(path, fs);
            if (allFiles.length == 0)
            {
                throw new IOException("there are no files in " + path.toString());
            }

            path = allFiles[0].getPath();
        }

        print.f("Obtaining schema of rubix file %s", path.toString());

        return path;
    }

    @SuppressWarnings("unchecked")
    public List<KeyData<K>> getKeyData() throws IOException,
            ClassNotFoundException
    {
        if (keyData != null)
            return keyData;

        final FileSystem fs = FileSystem.get(conf);
        keyData = new ArrayList<KeyData<K>>();

        final long filesize = fs.getFileStatus(path).getLen();
        FSDataInputStream in = fs.open(path);

        /* The last long in the file is the start position of the trailer section */
        in.seek(filesize - 8);
        long metaDataStartPos = in.readLong();

        in.seek(metaDataStartPos);

        ObjectMapper mapper = new ObjectMapper();
        metadataJson = mapper.readValue(in.readUTF(), JsonNode.class);

        int keySectionSize = in.readInt();

        // load the key section
        byte[] keySection = new byte[keySectionSize];

        in.seek(filesize - keySectionSize - 8);
        in.read(keySection, 0, keySectionSize);
        in.close();

        ByteArrayInputStream bis = new ByteArrayInputStream(keySection);
        DataInput dataInput = new DataInputStream(bis);

        int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue();

        // load the key section
        keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass"));
        valueClass =
                (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass"));

        SerializationFactory serializationFactory = new SerializationFactory(conf);
        Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass);

        deserializer.open(bis);

        while (bis.available() > 0 && numberOfBlocks > 0)
        {
            K key = deserializer.deserialize(null);

            long offset = dataInput.readLong();
            long blockId = dataInput.readLong();
            long numRecords = dataInput.readLong();

            keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId));
            numberOfBlocks--;
        }

        // Assign length to each keydata entry
        int numEntries = keyData.size();
        for (int i = 1; i < numEntries; i++)
        {
            KeyData<K> prev = keyData.get(i - 1);
            KeyData<K> current = keyData.get(i);

            prev.setLength(current.getOffset() - prev.getOffset());
        }

        if (numEntries > 0)
        {
            KeyData<K> last = keyData.get(numEntries - 1);
            last.setLength(metaDataStartPos - last.offset);
        }

        return keyData;
    }

    private static void extract(List<RubixFile<Tuple, Object>> rfiles,
                                long blockId, int numBlocks,
                                String output) throws IOException,
            ClassNotFoundException,
            InstantiationException,
            IllegalAccessException
    {
          Configuration conf = new JobConf();
          File outFile = new File(output);
          if (outFile.exists())
          {
              outFile.delete();
          }
          outFile.createNewFile();
          BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outFile));
          ByteArrayOutputStream keySectionStream = new ByteArrayOutputStream();
          DataOutput keySectionOut = new DataOutputStream(keySectionStream);
          SerializationFactory serializationFactory = new SerializationFactory(conf);
          RubixFile<Tuple,Object> lastrFile = null;
          JsonNode json;
          long totalLength = 0;

          final int BUF_SIZE = 32 * 1024;
          long blockIds[] = new long[numBlocks];
          int foundBlocks = 0;

          for(int i=0;i<numBlocks;i++)
            blockIds[i] = blockId+i;

          for(int i=0;i<numBlocks;i++)
          {
            boolean found = false;
            for (RubixFile<Tuple, Object> rfile : rfiles)
            {
              print.f("Checking %s", rfile.path.toString());
              List<KeyData<Tuple>> keyDataList = rfile.getKeyData();
              for (KeyData<Tuple> keyData : keyDataList)
              {
                  if (keyData.getBlockId() == blockIds[i])
                  {
                    long offset = keyData.getOffset();
                    long length = keyData.getLength();
                    Tuple key = keyData.getKey();
                    print.f("Extracting block %d (off=%d len=%d) from %s",
                        keyData.getBlockId(),
                        offset,
                        length,
                        rfile.path.toString());

                 // copy the data
                    if (length > 0)
                    {
                        FileSystem fs = FileSystem.get(conf);
                        FSDataInputStream in = fs.open(rfile.path);
                        in.seek(offset);

                        byte[] data = new byte[BUF_SIZE];
                        long toRead = length;
                        while (toRead > 0)
                        {
                            int thisRead = toRead > BUF_SIZE ? BUF_SIZE : (int) toRead;
                            in.readFully(data, 0, thisRead);
                            bos.write(data, 0, thisRead);
                            toRead -= thisRead;
                            System.out.print(".");
                        }
                        System.out.println();
                    }
                    // copy the key section
                    Serializer<Tuple> keySerializer =
                        serializationFactory.getSerializer(rfile.getKeyClass());
                    keySerializer.open(keySectionStream);

                    keySerializer.serialize(key);
                    keySectionOut.writeLong(totalLength); // position
                    keySectionOut.writeLong(keyData.getBlockId());
                    keySectionOut.writeLong(keyData.getNumRecords());
                    foundBlocks++;
                    totalLength += length;
                    lastrFile = rfile;

                    found = true;
                    break;

                  }
              }
              if(found){
                break;
              }
          }
          if(!found)
            System.err.println("Cannot locate block with id " + blockIds[i]);
        }
        byte[] trailerBytes = keySectionStream.toByteArray();

        json = JsonUtils.cloneNode(lastrFile.metadataJson);
        ((ObjectNode) json).put("numberOfBlocks", foundBlocks);

        DataOutput out = new DataOutputStream(bos);
        out.writeUTF(json.toString());
        out.writeInt(trailerBytes.length);
        out.write(trailerBytes);
        out.writeLong(totalLength); // trailer start offset
        bos.close();
    }

    private static void dumpAvro(List<RubixFile<Tuple, Object>> rfiles, String output) throws IOException,
            ClassNotFoundException,
            InstantiationException,
            IllegalAccessException,
            InterruptedException
    {
        // Configuration conf = new JobConf();
        // File outDir = new File(output);
        // if (outDir.exists())
        // outDir.delete();
        // outDir.mkdirs();
        //
        // RubixFile<Tuple, Object> firstFile = rfiles.get(0);
        // BlockSchema schema = firstFile.getSchema();
        // Schema avroSchema = AvroUtils.convertFromBlockSchema("recordName", schema);
        // AvroBlockWriter avroBlockWriter = new AvroBlockWriter();
        // Record record = avroBlockWriter.createRecord(avroSchema);
        //
        // for (RubixFile<Tuple, Object> rfile : rfiles)
        // {
        // Path inPath = rfile.path;
        //
        // List<KeyData<Tuple>> keyDataList = rfile.getKeyData();
        // File outPath = new File(outDir, inPath.getName() + ".avro");
        // outPath.createNewFile();
        //
        // GenericDatumWriter<GenericRecord> datumWriter =
        // new GenericDatumWriter<GenericRecord>(avroSchema);
        // DataFileWriter<GenericRecord> dataFileWriter =
        // new DataFileWriter<GenericRecord>(datumWriter);
        // dataFileWriter.create(avroSchema, outPath);
        //
        // for (KeyData<Tuple> keyData : keyDataList)
        // {
        //
        // RubixInputSplit<Tuple, Object> split =
        // new RubixInputSplit<Tuple, Object>(conf,
        // inPath,
        // keyData.getKey(),
        // keyData.getOffset(),
        // keyData.getLength(),
        // keyData.getBlockId(),
        // keyData.getNumRecords(),
        // rfile.getKeyClass(),
        // rfile.getValueClass(),
        // rfile.getSchema(),
        // rfile.getBlockSerializationType());
        // RubixRecordReader<Tuple, Object> recordReader =
        // new RubixRecordReader<Tuple, Object>();
        // recordReader.initialize(split, conf);
        //
        // while (recordReader.nextKeyValue())
        // {
        // Tuple tuple = (Tuple) recordReader.getCurrentValue();
        // for (int i = 0; i < schema.getNumColumns(); i++)
        // {
        // avroBlockWriter.writeField(record,
        // i,
        // tuple.get(i),
        // avroSchema.getFields().get(i).schema());
        // }
        // dataFileWriter.append(record);
        // }
        // }
        // dataFileWriter.close();
        // System.out.println("Written " + outPath);
        // }
    }

    private static void dumpText(List<RubixFile<Tuple, Object>> rfiles,
                                 String output,
                                 int numRows) throws IOException,
            InterruptedException,
            ClassNotFoundException,
            InstantiationException,
            IllegalAccessException
    {
        Configuration conf = new JobConf();
        int totalBlocks = 0;

        for (RubixFile<Tuple, Object> rfile : rfiles)
        {
            Path path = rfile.path;
            List<KeyData<Tuple>> keyDataList = rfile.getKeyData();

            print.f("--- %s", path.toString());
            print.f("Schema: %s", rfile.getSchema().toString());
            print.f("PartitionKeys: %s", Arrays.toString(rfile.getPartitionKeys()));
            print.f("SortKeys %s", Arrays.toString(rfile.getSortKeys()));
            print.f("Block Serialization Type: %s", rfile.getBlockSerializationType());
            print.f("Number of blocks: %d", keyDataList.size());

            totalBlocks += keyDataList.size();

            int cumrows = 0;

            for (KeyData<Tuple> keyData : keyDataList)
            {
                print.f("Block %s. BlockId: %d (Reducer: %d Index:%d)",
                        keyData,
                        keyData.blockId,
                        (keyData.getBlockId() >> 32),
                        (keyData.getBlockId() & (((long) 1 << 32) - 1)));

                if (numRows > 0)
                {
                    RubixInputSplit<Tuple, Object> split =
                            new RubixInputSplit<Tuple, Object>(conf,
                                                               path,
                                                               keyData.getKey(),
                                                               keyData.getOffset(),
                                                               keyData.getLength(),
                                                               keyData.getBlockId(),
                                                               keyData.getNumRecords(),
                                                               rfile.getKeyClass(),
                                                               rfile.getValueClass(),
                                                               rfile.getSchema(),
                                                               rfile.getBlockSerializationType());

                    RubixRecordReader<Tuple, Object> recordReader =
                            new RubixRecordReader<Tuple, Object>();
                    recordReader.initialize(split, conf);
                    int rows = 0;

                    while (recordReader.nextKeyValue())
                    {
                        rows++;
                        if (rows < numRows)
                        {
                            System.out.println("\t" + recordReader.getCurrentValue());
                        }
                        else
                        {
                            break;
                        }
                    }

                    cumrows += keyData.getNumRecords();
                    System.out.println(String.format("\tRows=%d Cummulative=%d",
                                                     keyData.getNumRecords(),
                                                     cumrows));
                }
            }
        }

        print.f("Total Blocks: %d", totalBlocks);
    }

    public static void main(String[] args) throws IOException,
            ClassNotFoundException,
            InterruptedException,
            ParseException,
            InstantiationException,
            IllegalAccessException
    {
        final int VERBOSE_NUM_ROWS = 4;

        Options options = new Options();

        options.addOption("h", "help", false, "shows this message");
        options.addOption("v",
                          "verbose",
                          false,
                          "print summary and first few rows of each block");
        options.addOption("m", "metadata", false, "show the metadata");
        options.addOption("d",
                          "dump",
                          false,
                          "dump the contents of the rubix file. Use -f for specifying format, and -o for specifying "
                          + "output location");
        options.addOption("f",
                          "format",
                          true,
                          "the data format for dumping data (AVRO or TEXT). Default: TEXT");
        options.addOption("e",
                          "extract",
                          true,
                          "Extract one or more rubix blocks starting from the given blockId. Use -e blockId,numBlocks "
                          + "for specifying the blocks to be extracted. Use -o for specifying output location");
        options.addOption("o", true, "Store the output at the specified location");

        CommandLineParser parser = new BasicParser();

        // parse the command line arguments
        CommandLine line = parser.parse(options, args);

        // show the help message
        if (line.hasOption("h"))
        {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("RubixFile <rubix file or dir> [options]\nIf no options are provided, print a summary of the blocks.",
                                options);
            return;
        }

        // validate provided options
        if (line.hasOption("d") && line.hasOption("e"))
        {
            System.err.println("Cannot dump (-d) and extract (-e) at the same time!");
            return;
        }

        // obtain the list of rubix files
        String[] files = line.getArgs();
        if (files == null || files.length == 0)
        {
            System.err.println("Rubix file not specified");
            return;
        }

        Configuration conf = new JobConf();
        FileSystem fs = FileSystem.get(conf);

        Path path = new Path(files[0]);
        FileStatus[] allFiles;

        FileStatus status = fs.getFileStatus(path);
        if (status.isDir())
        {
            allFiles = RubixFile.getRubixFiles(path, fs);
        }
        else
        {
            allFiles = new FileStatus[] { status };
        }

        // walk over all files and extract the trailer section
        List<RubixFile<Tuple, Object>> rfiles = new ArrayList<RubixFile<Tuple, Object>>();

        for (FileStatus s : allFiles)
        {
            Path p = s.getPath();

            RubixFile<Tuple, Object> rfile = new RubixFile<Tuple, Object>(conf, p);

            // if printing meta data information.. exit after first file (since all files
            // have the same meta data)
            if (line.hasOption("m"))
            {
                rfile.getKeyData();

                System.out.println(new ObjectMapper().writer()
                                                     .writeValueAsString(rfile.metadataJson));
                break;
            }

            rfiles.add(rfile);
        }

        // dump the data
        if (line.hasOption("d"))
        {
            String format = line.getOptionValue("f");
            if (format == null)
                format = "TEXT";

            format = format.trim().toUpperCase();

            if (format.equals("AVRO"))
            {
                // dumpAvro(rfiles, line.getOptionValue("o"));
                throw new UnsupportedOperationException("Dumping to avro is not currently supporting. Please write a Cubert (map-only) script to store data in avro format");
            }
            else if (format.equals("TEXT"))
            {
                if (line.hasOption("o"))
                {
                    System.err.println("Dumping TEXT format data *into a file* is not currently supported");
                    return;
                }
                dumpText(rfiles, line.getOptionValue("o"), Integer.MAX_VALUE);
            }
            else
            {
                System.err.println("Invalid format [" + format
                        + "] for dumping. Please use AVRO or TEXT");
                return;
            }
        }
        // extract arguments: -e blockId,numBlocks(contiguous) -o ouputLocation
        else if (line.hasOption("e"))
        {
            String extractArguments = line.getOptionValue("e");
            String outputLocation;
            if(line.hasOption("o"))
            {
              outputLocation = line.getOptionValue("o");
            }
            else
            {
              System.err.println("Need to specify the location to store the output");
              return;
            }
            long blockId;
            int numBlocks = 1;
            if(extractArguments.contains(","))
            {
              String[] splitExtractArgs = extractArguments.split(",");
              blockId = Long.parseLong(splitExtractArgs[0]);
              numBlocks = Integer.parseInt(splitExtractArgs[1]);
            }
            else
            {
              blockId = Long.parseLong(extractArguments);
            }

            extract(rfiles, blockId, numBlocks, outputLocation);
        }
        else
        // print summary
        {
            dumpText(rfiles, null, line.hasOption("v") ? VERBOSE_NUM_ROWS : 0);
        }
    }
}