/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hbase.reporter;

import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.datasketches.quantiles.DoublesUnion;
import org.apache.datasketches.quantiles.UpdateDoublesSketch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.math.BigDecimal;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.function.DoubleSupplier;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;

/**
 * Run a scan against a table reporting on row size, column size and count.
 *
 * So can run against cdh5, uses loads of deprecated API and copies some Cell sizing methods local.
 */
public class TableReporter {
  private static String GNUPLOT_DATA_SUFFIX = ".gnuplotdata";

  /**
   * Quantile sketches. Has a print that dumps out sketches on stdout.
   * To accumlate Sketches instances, see {@link AccumlatingSketch}
   */
  static class Sketches {
    private static final DoubleSupplier IN_POINT_1_INC = new DoubleSupplier() {
      private BigDecimal accumulator = new BigDecimal(0);
      private final BigDecimal pointOhOne = new BigDecimal(0.01);

      @Override
      public double getAsDouble() {
        double d = this.accumulator.doubleValue();
        this.accumulator = this.accumulator.add(pointOhOne);
        return d;
      }
    };

    /**
     * Make an array of 100 increasing numbers from 0-1.
     */
    static double [] NORMALIZED_RANKS = DoubleStream.generate(IN_POINT_1_INC).limit(100).toArray();

    /**
     * Bins that sort of make sense for the data we're seeing here. After some trial and error.
     */
    static double [] BINS = new double [] {1, 5, 10, 15, 20, 25, 100, 1024, 5120, 10240, 20480, 51200, 102400, 1048576};

    /**
     * Size of row.
     */
    final UpdateDoublesSketch rowSizeSketch;

    /**
     * Count of columns in row.
     */
    final UpdateDoublesSketch columnCountSketch;

    Sketches() {
      this(DoublesSketch.builder().setK(256).build(), DoublesSketch.builder().setK(256).build());
    }

    Sketches(UpdateDoublesSketch rowSizeSketch, UpdateDoublesSketch columnCountSketch) {
      this.rowSizeSketch = rowSizeSketch;
      this.columnCountSketch = columnCountSketch;
    }

    void print(String preamble) {
      System.out.println(preamble);
      print();
    }

    void print() {
      print("rowSize", rowSizeSketch);
      print("columnCount", columnCountSketch);
    }

    private static void print(String label, final DoublesSketch sketch) {
      System.out.println(label + " quantiles " + Arrays.toString(sketch.getQuantiles(NORMALIZED_RANKS)));
      double [] pmfs = sketch.getPMF(BINS);
      // System.out.println(label + " pmfs " + Arrays.toString(pmfs));
      System.out.println(label + " histo " +
          (pmfs == null || pmfs.length == 0?
              "null": Arrays.toString(Arrays.stream(pmfs).map(d -> d * sketch.getN()).toArray())));
      System.out.println(label + "stats N=" + sketch.getN() + ", min=" + sketch.getMinValue() + ", max=" +
          sketch.getMaxValue());
    }
  }

  /**
   * For aggregating {@link Sketches}
   * To add sketches, need a DoublesUnion Sketch.
   */
  static class AccumlatingSketch {
    DoublesUnion rowSizeUnion = DoublesUnion.builder().build();
    DoublesUnion columnSizeUnion = DoublesUnion.builder().build();

    void add(Sketches other) {
      this.rowSizeUnion.update(other.rowSizeSketch);
      this.columnSizeUnion.update(other.columnCountSketch);
    }

    /**
     * @return A Sketches made of current state of aggregation.
     */
    Sketches get() {
      return new Sketches(rowSizeUnion.getResult(), columnSizeUnion.getResult());
    }
  }

  static void processRowResult(Result result, Sketches sketches) {
    // System.out.println(result.toString());
    long rowSize = 0;
    int columnCount = 0;
    for (Cell cell : result.rawCells()) {
      rowSize += estimatedSizeOfCell(cell);
      columnCount += 1;
    }
    sketches.rowSizeSketch.update(rowSize);
    sketches.columnCountSketch.update(columnCount);
  }

  /**
   * @return First <code>fraction</code> of Table's regions.
   */
  private static List<RegionInfo> getRegions(Connection connection, TableName tableName,
      double fraction, String encodedRegionName) throws IOException {
    try (Admin admin = connection.getAdmin()) {
      // Use deprecated API because running against old hbase.
      List<RegionInfo> regions = admin.getRegions(tableName);
      if (regions.size() <= 0) {
        throw new HBaseIOException("No regions found in " + tableName);
      }
      if (encodedRegionName != null) {
        return regions.stream().filter(ri -> ri.getEncodedName().equals(encodedRegionName)).
            collect(Collectors.toCollection(ArrayList::new));
      }
      return regions.subList(0, (int)(regions.size() * fraction)); // Rounds down.
    }
  }

  /**
   * Class that scans a Region to produce a Sketch.
   */
  static class SketchRegion implements Callable<SketchRegion> {
    private final RegionInfo ri;
    private final Connection connection;
    private final TableName tableName;
    private final int limit;
    private Sketches sketches = new Sketches();
    private volatile long duration;

    SketchRegion(Connection connection, TableName tableName, RegionInfo ri, int limit) {
      this.ri = ri;
      this.connection = connection;
      this.tableName = tableName;
      this.limit = limit;
    }

    @Override
    public SketchRegion call() {
      try (Table table = this.connection.getTable(this.tableName)) {
        Scan scan = new Scan();
        scan.setStartRow(this.ri.getStartKey());
        scan.setStopRow(this.ri.getEndKey());
        scan.setAllowPartialResults(true);
        long startTime = System.currentTimeMillis();
        long count = 0;
        try (ResultScanner resultScanner = table.getScanner(scan)) {
          for (Result result : resultScanner) {
            processRowResult(result, sketches);
            count++;
            if (this.limit >= 0 && count <= this.limit) {
              break;
            }
          }
        }
        this.duration = System.currentTimeMillis() - startTime;
      } catch (IOException e) {
        e.printStackTrace();
      }
      return this;
    }

    Sketches getSketches() {
      return this.sketches;
    }

    RegionInfo getRegionInfo() {
      return this.ri;
    }

    long getDuration() {
      return this.duration;
    }
  }

  private static void sketch(Configuration configuration, String tableNameAsStr, int limit,
    double fraction, int threads, String isoNow, String encodedRegionName)
  throws IOException, InterruptedException, ExecutionException {
    TableName tableName = TableName.valueOf(tableNameAsStr);
    AccumlatingSketch totalSketches = new AccumlatingSketch();
    long startTime = System.currentTimeMillis();
    int count = 0;
    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
      // Get list of Regions. If 'fraction', get this fraction of all Regions. If
      // encodedRegionName, then set fraction to 1.0 in case the returned set does not
      // include the encodedRegionName we're looking for.
      List<RegionInfo> regions = getRegions(connection, tableName, fraction, encodedRegionName);
      count = regions.size();
      if (count <= 0) {
        throw new HBaseIOException("Empty regions list; fraction " + fraction +
            " too severe or communication problems?");
      } else {
        System.out.println(Instant.now().toString() + " Scanning " + tableNameAsStr +
            " regions=" + count + ", " + regions);
      }
      ExecutorService es =
          Executors.newFixedThreadPool(threads, new ThreadFactory() {
            @Override
            public Thread newThread(Runnable r) {
              Thread t = new Thread(r);
              t.setDaemon(true);
              return t;
            }
          });
      try {
        List<SketchRegion> srs = regions.stream().map(ri -> new SketchRegion(connection, tableName, ri, limit)).
            collect(Collectors.toList());
        List<Future<SketchRegion>> futures = new ArrayList<>(srs.size());
        for (SketchRegion sr: srs) {
          // Do submit rather than inokeall; invokeall blocks until all done. This way I get control back
          // after all submitted.
          futures.add(es.submit(sr));
        }
        // Avoid java.util.ConcurrentModificationException
        List<Future<SketchRegion>> removals = new ArrayList<>();
        while (!futures.isEmpty()) {
          for (Future<SketchRegion> future: futures) {
            if (future.isDone()) {
              SketchRegion sr = future.get();
              sr.getSketches().print(Instant.now().toString() +
                  " region=" + sr.getRegionInfo().getRegionNameAsString() + ", duration=" +
                  (Duration.ofMillis(sr.getDuration()).toString()));
              totalSketches.add(sr.getSketches());
              removals.add(future);
            }
          }
          if (!removals.isEmpty()) {
            futures.removeAll(removals);
            removals.clear();
          }
          Thread.sleep(1000);
        }
      } finally {
        es.shutdown();
      }
    }
    Sketches sketches = totalSketches.get();
    String isoDuration = Duration.ofMillis(System.currentTimeMillis() - startTime).toString();
    sketches.print(Instant.now().toString() + " Totals for " + tableNameAsStr + " regions=" + count +
        ", limit=" + limit + ", fraction=" + fraction + ", took=" + isoDuration);
    // Dump out the gnuplot files. Saves time generating graphs.
    dumpGnuplotDataFiles(isoNow, sketches, tableNameAsStr, count, isoDuration);
  }

  /**
   * This is an estimate of the heap space occupied by a cell. When the cell is of type
   * {@link HeapSize} we call {@link HeapSize#heapSize()} so cell can give a correct value. In other
   * cases we just consider the bytes occupied by the cell components ie. row, CF, qualifier,
   * timestamp, type, value and tags.
   * Note that this can be the JVM heap space (on-heap) or the OS heap (off-heap)
   * @return estimate of the heap space
   */
  public static long estimatedSizeOfCell(final Cell cell) {
    if (cell instanceof HeapSize) {
      return ((HeapSize) cell).heapSize();
    }
    // TODO: Add sizing of references that hold the row, family, etc., arrays.
    return estimatedSerializedSizeOf(cell);
  }

  /**
   * Estimate based on keyvalue's serialization format in the RPC layer. Note that there is an extra
   * SIZEOF_INT added to the size here that indicates the actual length of the cell for cases where
   * cell's are serialized in a contiguous format (For eg in RPCs).
   * @return Estimate of the <code>cell</code> size in bytes plus an extra SIZEOF_INT indicating the
   *         actual cell length.
   */
  public static int estimatedSerializedSizeOf(final Cell cell) {
    if (cell instanceof ExtendedCell) {
      return ((ExtendedCell) cell).getSerializedSize(true) + Bytes.SIZEOF_INT;
    }

    return getSumOfCellElementLengths(cell) +
        // Use the KeyValue's infrastructure size presuming that another implementation would have
        // same basic cost.
        KeyValue.ROW_LENGTH_SIZE + KeyValue.FAMILY_LENGTH_SIZE +
        // Serialization is probably preceded by a length (it is in the KeyValueCodec at least).
        Bytes.SIZEOF_INT;
  }

  /**
   * @return Sum of the lengths of all the elements in a Cell; does not count in any infrastructure
   */
  private static int getSumOfCellElementLengths(final Cell cell) {
    return getSumOfCellKeyElementLengths(cell) + cell.getValueLength() + cell.getTagsLength();
  }

  /**
   * @return Sum of all elements that make up a key; does not include infrastructure, tags or
   *         values.
   */
  private static int getSumOfCellKeyElementLengths(final Cell cell) {
    return cell.getRowLength() + cell.getFamilyLength() + cell.getQualifierLength()
        + KeyValue.TIMESTAMP_TYPE_SIZE;
  }

  private static String getFileNamePrefix(String isoNow, String tableName, String sketchName) {
    return "reporter." + isoNow + "." + tableName + "." + sketchName;
  }

  private static String getFileFirstLine(String tableName, int regions, String isoDuration, UpdateDoublesSketch sketch) {
    return "# " + tableName + " regions=" + regions + ", duration=" + isoDuration + ", N=" + sketch.getN() +
        ", min=" + sketch.getMinValue() + ", max=" + sketch.getMaxValue();
  }

  private static void dumpPercentilesFile(String prefix, String firstLine, UpdateDoublesSketch sketch)
      throws IOException {
    dumpFile(File.createTempFile(prefix + ".percentiles.", GNUPLOT_DATA_SUFFIX),
        firstLine, sketch.getQuantiles(Sketches.NORMALIZED_RANKS));
  }

  private static void dumpHistogramFile(String prefix, String firstLine, UpdateDoublesSketch sketch)
      throws IOException {
    double [] pmfs = sketch.getPMF(Sketches.BINS);
    double [] ds = Arrays.stream(pmfs).map(d -> d * sketch.getN()).toArray();
    dumpFile(File.createTempFile(prefix + ".histograms.", GNUPLOT_DATA_SUFFIX),
        firstLine, ds);
  }

  private static void dumpFile(File file, String firstLine, double [] ds) throws IOException {
    try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) {
      writer.write(firstLine);
      writer.newLine();
      for (double d : ds) {
        writer.write(Double.toString(d));
        writer.newLine();
      }
    }
    System.out.println(Instant.now().toString() + " wrote " + file.toString());
  }

  private static void dumpFiles(String prefix, String firstLine, UpdateDoublesSketch sketch) throws IOException {
    dumpPercentilesFile(prefix, firstLine, sketch);
    dumpHistogramFile(prefix, firstLine, sketch);
  }

  /**
   * Write four files, a histogram and percentiles, one each for each of the row size and column count sketches.
   * Tie the four files with isoNow time.
   */
  private static void dumpGnuplotDataFiles(String isoNow, Sketches sketches, String tableName, int regions,
      String isoDuration) throws IOException {
    UpdateDoublesSketch sketch = sketches.columnCountSketch;
    dumpFiles(getFileNamePrefix(isoNow, tableName, "columnCount"),
        getFileFirstLine(tableName, regions, isoDuration, sketch), sketch);
    sketch = sketches.rowSizeSketch;
    dumpFiles(getFileNamePrefix(isoNow, tableName, "rowSize"),
        getFileFirstLine(tableName, regions, isoDuration, sketch), sketch);
  }

  static void usage(Options options) {
    usage(options, null);
  }

  static void usage(Options options, String error) {
    if (error != null) {
      System.out.println("ERROR: " + error);
    }
    // HelpFormatter can't output -Dproperty=value.
    // Options doesn't know how to process -D one=two...i.e.
    // with a space between -D and the property-value... so
    // take control of the usage output and output what
    // Options can parse.
    System.out.println("Usage: reporter <OPTIONS> TABLENAME");
    System.out.println("OPTIONS:");
    System.out.println(" -h,--help        Output this help message");
    System.out.println(" -l,--limit       Scan row limit (per thread): default none");
    System.out.println(" -f,--fraction    Fraction of table Regions to read; between 0 and 1: default 1.0 (all)");
    System.out.println(" -r,--region      Scan this Region only; pass encoded name; 'fraction' is ignored.");
    System.out.println(" -t,--threads     Concurrent thread count (thread per Region); default 1");
    System.out.println(" -Dproperty=value Properties such as the zookeeper to connect to; e.g:");
    System.out.println("                  -Dhbase.zookeeper.quorum=ZK0.remote.cluster.example.org");
  }

  public static void main(String [] args)
  throws ParseException, IOException, ExecutionException, InterruptedException {
    Options options = new Options();
    Option help = Option.builder("h").longOpt("help").
        desc("output this help message").build();
    options.addOption(help);
    Option limitOption = Option.builder("l").longOpt("limit").hasArg().build();
    options.addOption(limitOption);
    Option fractionOption = Option.builder("f").longOpt("fraction").hasArg().build();
    options.addOption(fractionOption);
    Option regionOption = Option.builder("r").longOpt("region").hasArg().build();
    options.addOption(regionOption);
    Option threadsOption = Option.builder("t").longOpt("threads").hasArg().build();
    options.addOption(threadsOption);
    Option configOption = Option.builder("D").valueSeparator().argName("property=value").
        hasArgs().build();
    options.addOption(configOption);
    // Parse command-line.
    CommandLineParser parser = new DefaultParser();
    CommandLine commandLine = parser.parse(options, args);

    // Process general options.
    if (commandLine.hasOption(help.getOpt()) || commandLine.getArgList().isEmpty()) {
      usage(options);
      System.exit(0);
    }

    int limit = -1;
    String opt = limitOption.getOpt();
    if (commandLine.hasOption(opt)) {
      limit = Integer.parseInt(commandLine.getOptionValue(opt));
    }
    double fraction = 1.0;
    opt = fractionOption.getOpt();
    if (commandLine.hasOption(opt)) {
      fraction = Double.parseDouble(commandLine.getOptionValue(opt));
      if (fraction > 1 || fraction <= 0) {
        usage(options, "Bad fraction: " + fraction + "; fraction must be > 0 and < 1");
        System.exit(0);
      }
    }
    int threads = 1;
    opt = threadsOption.getOpt();
    if (commandLine.hasOption(opt)) {
      threads = Integer.parseInt(commandLine.getOptionValue(opt));
      if (threads > 1000 || threads <= 0) {
        usage(options, "Bad thread count: " + threads + "; must be > 0 and < 1000");
        System.exit(0);
      }
    }

    String encodedRegionName = null;
    opt = regionOption.getOpt();
    if (commandLine.hasOption(opt)) {
      encodedRegionName = commandLine.getOptionValue(opt);
    }

    Configuration configuration = HBaseConfiguration.create();
    opt = configOption.getOpt();
    if (commandLine.hasOption(opt)) {
      // If many options, they all show up here in the keyValues
      // array, one after the other.
      String [] keyValues = commandLine.getOptionValues(opt);
      for (int i = 0; i < keyValues.length;) {
        configuration.set(keyValues[i], keyValues[i + 1]);
        i += 2; // Skip over this key and value to next one.
      }
    }

    // Now process commands.
    String [] commands = commandLine.getArgs();
    if (commands.length < 1) {
      usage(options, "No TABLENAME: " + Arrays.toString(commands));
      System.exit(1);
    }

    String now = Instant.now().toString();
    for (String command : commands) {
      sketch(configuration, command, limit, fraction, threads, now, encodedRegionName);
    }
  }
}