/*
 * Copyright 2017 Google LLC
 *
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.hadoop.io.bigquery;

import com.google.cloud.hadoop.util.HadoopToStringUtil;
import com.google.common.base.Preconditions;
import com.google.common.flogger.GoogleLogger;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;

/**
 * The GsonRecordReader reads records from GCS through GHFS. It takes newline-delimited Json files
 * in GCS and reads them through LineRecordReader. It parses each line in the file split into
 * key/value pairs with the line number as the key and the jsonObject represented by the line as the
 * value. These pairs are passed as input to the Mapper.
 */
public class GsonRecordReader extends RecordReader<LongWritable, JsonObject> {
  private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

  // A LineRecordReader which handles most calls. The GsonRecordReader just provides a wrapper which
  // translates the results of LineRecordReader into Json objects.
  private LineRecordReader lineReader;

  // Current key.
  private LongWritable currentKey = new LongWritable(0L);

  // Current value.
  private JsonObject currentValue;

  // Total key, value pairs read.
  private int count;

  // Used to parse the JsonObject from the LineRecordReader output.
  private JsonParser jsonParser;

  /**
   * Called once at initialization to initialize the RecordReader.
   *
   * @param genericSplit the split that defines the range of records to read.
   * @param context the information about the task.
   * @throws IOException on IO Error.
   */
  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext context)
      throws IOException, InterruptedException {
    if (logger.atFine().isEnabled()) {
        logger.atFine().log(
            "initialize('%s', '%s')",
            HadoopToStringUtil.toString(genericSplit), HadoopToStringUtil.toString(context));
    }
    Preconditions.checkArgument(genericSplit instanceof FileSplit,
        "InputSplit genericSplit should be an instance of FileSplit.");
    // Get FileSplit.
    FileSplit fileSplit = (FileSplit) genericSplit;
    // Create the JsonParser.
    jsonParser = new JsonParser();
    // Initialize the LineRecordReader.
    lineReader = new LineRecordReader();
    lineReader.initialize(fileSplit, context);
  }

  /**
   * Reads the next key, value pair. Gets next line and parses Json object.
   *
   * @return true if a key/value pair was read.
   * @throws IOException on IO Error.
   */
  @Override
  public boolean nextKeyValue()
      throws IOException {
    // If there is no next value, return false. Set current key and value to null.
    // Different Hadoop recordreaders have different behavior for calling current key and value
    // after nextKeyValue returns false.
    if (!lineReader.nextKeyValue()) {
      logger.atFine().log("All values read: record reader read %s key, value pairs.", count);
      return false;
    }
    // Get the next line.
    currentKey.set(lineReader.getCurrentKey().get());
    Text lineValue = lineReader.getCurrentValue();
    currentValue = jsonParser.parse(lineValue.toString()).getAsJsonObject();
    // Increment count of key, value pairs.
    count++;
    return true;
  }

  /**
   * Gets the current key.
   *
   * @return the current key or null if there is no current key.
   */
  @Override
  public LongWritable getCurrentKey() {
    return currentKey;
  }

  /**
   * Gets the current value.
   *
   * @return the current value or null if there is no current value.
   */
  @Override
  public JsonObject getCurrentValue() {
    return currentValue;
  }

  /**
   * Returns the current progress of the record reader through its data.
   *
   * @return a number between 0.0 and 1.0 that is the fraction of the data read.
   * @throws IOException on IO Error.
   */
  @Override
  public float getProgress()
      throws IOException {
    return lineReader.getProgress();
  }

  /**
   * Closes the record reader.
   *
   * @throws IOException on IO Error.
   */
  @Override
  public void close()
      throws IOException {
    lineReader.close();
  }
}