/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.proto.utils;

import com.google.protobuf.Message;
import com.google.protobuf.MessageOrBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.parquet.proto.ProtoParquetInputFormat;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;


/**
 * Reads data from given parquet file using MapReduce job.
 */
public class ReadUsingMR {

  private static List<Message> outputMessages;

  Configuration conf;
  private String projection;

  public void setRequestedProjection(String projection) {
    this.projection = projection;
  }

  public ReadUsingMR() {
    this(new Configuration());
  }

  public ReadUsingMR(Configuration conf) {
    this.conf = conf;
  }

  public Configuration getConfiguration() {
    return conf;
  }

  public static class ReadingMapper extends Mapper<Void, MessageOrBuilder, LongWritable, Message> {
    protected void map(Void key, MessageOrBuilder value, Context context) {
      Message clone = ((Message.Builder) value).build();
      outputMessages.add(clone);
    }
  }

  public List<Message> read(Path parquetPath) throws Exception {

    synchronized (ReadUsingMR.class) {
      outputMessages = new ArrayList<Message>();

      final Job job = new Job(conf, "read");
      job.setInputFormatClass(ProtoParquetInputFormat.class);
      ProtoParquetInputFormat.setInputPaths(job, parquetPath);
      if (projection != null) {
        ProtoParquetInputFormat.setRequestedProjection(job, projection);
      }

      job.setMapperClass(ReadingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(NullOutputFormat.class);

      WriteUsingMR.waitForJob(job);

      List<Message> result = Collections.unmodifiableList(outputMessages);
      outputMessages = null;
      return result;
    }
  }

}