/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred.gridmix;

import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

// TODO can replace with form of GridmixJob
class GenerateData extends GridmixJob {

  /**
   * Total bytes to write.
   */
  public static final String GRIDMIX_GEN_BYTES = "gridmix.gen.bytes";

  /**
   * Maximum size per file written.
   */
  public static final String GRIDMIX_GEN_CHUNK = "gridmix.gen.bytes.per.file";

  /**
   * Size of writes to output file.
   */
  public static final String GRIDMIX_VAL_BYTES = "gendata.val.bytes";

  /**
   * Status reporting interval, in megabytes.
   */
  public static final String GRIDMIX_GEN_INTERVAL = "gendata.interval.mb";

  public GenerateData(Configuration conf, Path outdir, long genbytes)
      throws IOException {
    super(conf, 0L, "GRIDMIX_GENDATA");
    job.getConfiguration().setLong(GRIDMIX_GEN_BYTES, genbytes);
    FileOutputFormat.setOutputPath(job, outdir);
  }

  @Override
  public Job call() throws IOException, InterruptedException,
                           ClassNotFoundException {
    job.setMapperClass(GenDataMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(GenDataFormat.class);
    job.setOutputFormatClass(RawBytesOutputFormat.class);
    job.setJarByClass(GenerateData.class);
    FileInputFormat.addInputPath(job, new Path("ignored"));
    job.submit();
    return job;
  }

  public static class GenDataMapper
      extends Mapper<NullWritable,LongWritable,NullWritable,BytesWritable> {

    private BytesWritable val;
    private final Random r = new Random();

    @Override
    protected void setup(Context context)
        throws IOException, InterruptedException {
      val = new BytesWritable(new byte[
          context.getConfiguration().getInt(GRIDMIX_VAL_BYTES, 1024 * 1024)]);
    }

    @Override
    public void map(NullWritable key, LongWritable value, Context context)
        throws IOException, InterruptedException {
      for (long bytes = value.get(); bytes > 0; bytes -= val.getLength()) {
        r.nextBytes(val.getBytes());
        val.setSize((int)Math.min(val.getLength(), bytes));
        context.write(key, val);
      }
    }

  }

  static class GenDataFormat extends InputFormat<NullWritable,LongWritable> {

    @Override
    public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
      final JobClient client =
        new JobClient(new JobConf(jobCtxt.getConfiguration()));
      ClusterStatus stat = client.getClusterStatus(true);
      final long toGen =
        jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
      if (toGen < 0) {
        throw new IOException("Invalid/missing generation bytes: " + toGen);
      }
      final int nTrackers = stat.getTaskTrackers();
      final long bytesPerTracker = toGen / nTrackers;
      final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
      final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
      final Matcher m = trackerPattern.matcher("");
      for (String tracker : stat.getActiveTrackerNames()) {
        m.reset(tracker);
        if (!m.find()) {
          System.err.println("Skipping node: " + tracker);
          continue;
        }
        final String name = m.group(1);
        splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
      }
      return splits;
    }

    @Override
    public RecordReader<NullWritable,LongWritable> createRecordReader(
        InputSplit split, final TaskAttemptContext taskContext)
        throws IOException {
      return new RecordReader<NullWritable,LongWritable>() {
        long written = 0L;
        long write = 0L;
        long RINTERVAL;
        long toWrite;
        final NullWritable key = NullWritable.get();