/* 
 * Copyright 2018 Aerospike, Inc.
 *
 * Portions may be licensed to Aerospike, Inc. under one or more
 * contributor license agreements.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You
 * may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package com.aerospike.hadoop.examples.wordcountoutput;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
// These are all needed by MyOutputFormat.
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.aerospike.client.AerospikeClient;
import com.aerospike.client.Bin;
import com.aerospike.client.Key;
import com.aerospike.client.policy.WritePolicy;
import com.aerospike.hadoop.mapreduce.AerospikeOutputFormat;
import com.aerospike.hadoop.mapreduce.AerospikeRecordWriter;

public class WordCountOutput extends Configured implements Tool {

    private static final Log log = LogFactory.getLog(WordCountOutput.class);

    public static class Map
        extends MapReduceBase
        implements Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(LongWritable key, Text value,
                        OutputCollector<Text, IntWritable> output,
                        Reporter reporter)
            throws IOException {
            String line = value.toString();
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
                word.set(tokenizer.nextToken());
                output.collect(word, one);
            }
        }
    }

    public static class Reduce
        extends MapReduceBase
        implements Reducer<Text, IntWritable, Text, IntWritable> {
                
        public void reduce(Text key, Iterator<IntWritable> values,
                           OutputCollector<Text, IntWritable> output,
                           Reporter reporter)
            throws IOException {
            int sum = 0;
            while (values.hasNext()) {
                sum += values.next().get();
            }
            output.collect(key, new IntWritable(sum));
        }
    }

    public static class MyOutputFormat
        extends AerospikeOutputFormat<Text, IntWritable> {

        public static class MyRecordWriter
            extends AerospikeRecordWriter<Text, IntWritable> {

            public MyRecordWriter(Configuration cfg, Progressable progressable) {
                super(cfg);
            }

            @Override
            public void writeAerospike(Text key,
                                       IntWritable value,
                                       AerospikeClient client,
                                       WritePolicy writePolicy,
                                       String namespace,
                                       String setName) throws IOException {
                Key kk = new Key(namespace, setName, key.toString());
                Bin bin1 = new Bin("word", key.toString());
                Bin bin2 = new Bin("count", value.get());
                client.put(writePolicy, kk, bin1, bin2);
            }
        }

        public RecordWriter<Text, IntWritable>
            getAerospikeRecordWriter(Configuration conf, Progressable prog) {
            return new MyRecordWriter(conf, prog);
        }
    }

    public int run(final String[] args) throws Exception {

        log.info("run starting");

        final Configuration conf = getConf();

        JobConf job = new JobConf(conf, WordCountOutput.class);
        job.setJobName("AerospikeWordCountOutput");

        for (int ii = 0; ii < args.length; ++ii) {
            FileInputFormat.addInputPath(job, new Path(args[ii]));
        }

        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setOutputFormat(MyOutputFormat.class);

        JobClient.runJob(job);

        log.info("finished");
        return 0;
    }

    public static void main(final String[] args) throws Exception {
        System.exit(ToolRunner.run(new WordCountOutput(), args));
    }
}

// Local Variables:
// mode: java
// c-basic-offset: 4
// tab-width: 4
// indent-tabs-mode: nil
// End:
// vim: softtabstop=4:shiftwidth=4:expandtab