package com.bigdata.etl.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.DataOutputStream;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.HashMap;
import java.util.Iterator;

public class LogOutputFormat<K, V> extends TextOutputFormat<K, V> {

    private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();

    static {
        NUMBER_FORMAT.setMinimumIntegerDigits(5);
        NUMBER_FORMAT.setGroupingUsed(false);
    }

    private RecordWriter writer = null;
    @Override
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
        if (writer == null) {
            writer = new MultiRecordWriter(job, getTaskOutputPath(job));
        }
        return writer;
    }

    private Path getTaskOutputPath(TaskAttemptContext conf) throws IOException {
        Path taskOutputPath;
        OutputCommitter committer = getOutputCommitter(conf);
        if (committer instanceof FileOutputCommitter) {
            taskOutputPath = ((FileOutputCommitter) committer).getWorkPath();
        } else {
            Path outputPaht = getOutputPath(conf);
            if (outputPaht == null) {
                throw new IOException("Undefined job output path.");
            }
            taskOutputPath = outputPaht;
        }
        return taskOutputPath;
    }

    public class MultiRecordWriter extends RecordWriter<K, V> {
        private HashMap<String, RecordWriter<K, V>> recordWriter;
        private TaskAttemptContext job;
        private Path outputPath;

        public MultiRecordWriter(TaskAttemptContext job, Path outputPath) {
            super();
            this.job = job;
            this.recordWriter = new HashMap<String, RecordWriter<K, V>>();
            this.outputPath = outputPath;
        }

        private String getFileBaseName(K key, String name) {
            return new StringBuilder(60).append(key.toString()).append("-").append(name).toString();
        }

        public void write(K key, V value) throws IOException, InterruptedException {
            TaskID taskID = job.getTaskAttemptID().getTaskID();
            int partition = taskID.getId();
            String baseName = getFileBaseName(key, NUMBER_FORMAT.format(partition));
            RecordWriter<K, V> rw = this.recordWriter.get(baseName);
            if (rw == null) {
                rw = getBaseRecordWriter(job, baseName);
                this.recordWriter.put(baseName, rw);
            }
            rw.write(null, value);
        }

        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            Iterator<RecordWriter<K, V>> values = this.recordWriter.values().iterator();
            while (values.hasNext()) {
                values.next().close(context);
            }
            this.recordWriter.clear();

        }

        private RecordWriter<K, V> getBaseRecordWriter(TaskAttemptContext job, String baseName) throws IOException {
            RecordWriter<K, V> recordWriter;
            boolean isCompressed = getCompressOutput(job);
            Configuration conf = job.getConfiguration();

            if (isCompressed) {
                Class<? extends CompressionCodec > codecClass = getOutputCompressorClass(job, GzipCodec.class);
                CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

                Path file = new Path(outputPath, baseName + codec.getDefaultExtension());
                FSDataOutputStream fileOut = file.getFileSystem(conf).create(file, false);
                recordWriter = new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)));
            } else {
                Path file = new Path(outputPath, baseName);
                FSDataOutputStream fileOut = file.getFileSystem(conf).create(file, false);
                recordWriter = new LineRecordWriter<K, V>(fileOut);
            }

            return recordWriter;
        }
    }
}