/*
 * Copyright (C) 2014 Indeed Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package com.indeed.imhotep.archive;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.io.ByteStreams;
import com.indeed.imhotep.archive.compression.SquallArchiveCompressor;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import com.indeed.util.compress.CompressionOutputStream;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.security.DigestOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author jsgroth
 */
public class SquallArchiveWriter {
    private static final Joiner TAB = Joiner.on("\t");
    private static final Joiner SLASH = Joiner.on("/");

    @VisibleForTesting
    static final Pattern ARCHIVE_FILENAME_PATTERN = Pattern.compile("^archive(\\d+)\\.bin$");

    private final FileSystem fs;
    private final Path path;

    private final List<FileMetadata> pendingMetadataWrites;

    private final SquallArchiveCompressor defaultCompressor;

    private int archivePathCounter;

    /**
     * create an archive writer
     * be aware that if create is set to true, any existing archive at the given path will be destroyed
     * if there is currently no archive at the given path, create MUST be set to true or the first call to appendFile
     *   will throw FileNotFoundException
     *
     * @param fs a file system implementation
     * @param path the directory to write this archive to
     * @param create whether to create from scratch or append to
     * @throws IOException if there is an IO problem
     */
    public SquallArchiveWriter(FileSystem fs, Path path, boolean create) throws IOException {
        this(fs, path, create, SquallArchiveCompressor.NONE);
    }

    public SquallArchiveWriter(FileSystem fs, Path path, boolean create, SquallArchiveCompressor defaultCompressor) throws IOException {
        this.fs = fs;
        this.path = path;

        pendingMetadataWrites = Lists.newArrayList();

        this.defaultCompressor = defaultCompressor;

        if (create) {
            archivePathCounter = 0;
            fs.create(new Path(path, "metadata.txt"), true).close();
            deleteExistingArchiveFiles(fs, path);
        } else {
            archivePathCounter = computeCurrentArchivePathCounter(fs, path);
        }
    }

    private static void deleteExistingArchiveFiles(FileSystem fs, Path path) throws IOException {
        for (final FileStatus status : fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return ARCHIVE_FILENAME_PATTERN.matcher(path.getName()).matches();
            }
        })) {
            fs.delete(status.getPath(), true);
        }
    }

    private static int computeCurrentArchivePathCounter(FileSystem fs, Path path) throws IOException {
        int max = -1;
        for (final FileStatus status : fs.listStatus(path)) {
            final String pathName = status.getPath().getName();
            final Matcher matcher = ARCHIVE_FILENAME_PATTERN.matcher(pathName);
            if (matcher.matches()) {
                final String numberString = matcher.group(1);
                final int number = Integer.parseInt(numberString);
                max = Math.max(max, number);
            }
        }
        return max + 1;
    }

    private Path newArchivePath() {
        return new Path(path, "archive" + (archivePathCounter++) + ".bin");
    }

    /**
     * convenience method, calls appendFile or appendDirectory depending on whether or not file refers to a directory
     * does not modify metadata.txt until {@link #commit()} is called
     *
     * @param file the file or directory to append
     * @throws IOException if there is an IO problem
     */
    public void append(File file) throws IOException {
        append(file, defaultCompressor);
    }

    public void append(File file, SquallArchiveCompressor compressor) throws IOException {
        if (file.isDirectory()) {
            appendDirectory(file, compressor);
        } else {
            appendFile(file, compressor);
        }
    }

    /**
     * recursively appends a directory, stripping the root directory name from the filenames
     * this is safer than appendDirectory because it only makes one call to {@link org.apache.hadoop.fs.FileSystem#append(Path)}
     * there is no need to call {@link #commit()} after calling this method
     *
     * @param directory the directory to append
     * @throws IOException if there is an IO problem
     */
    public void batchAppendDirectory(File directory) throws IOException {
        batchAppendDirectory(directory, defaultCompressor);
    }

    public void batchAppendDirectory(File directory, SquallArchiveCompressor compressor) throws IOException {
        if (!directory.isDirectory()) {
            throw new FileNotFoundException(directory.getAbsolutePath() + " is not a directory");
        }

        batchAppend(Arrays.asList(sorted(directory.listFiles())), compressor);
    }

    /**
     * appends a set of files while only making a single call to {@link org.apache.hadoop.fs.FileSystem#append(Path)}
     * there is no need to call {@link #commit()} after calling this method
     *
     * @param files the files to append
     * @throws IOException if there is an IO problem
     */
    public void batchAppend(Iterable<File> files) throws IOException {
        batchAppend(files, defaultCompressor);
    }

    public void batchAppend(Iterable<File> files, SquallArchiveCompressor compressor) throws IOException {
        batchAppend(files, compressor, newArchivePath());
    }

    private void batchAppend(Iterable<File> files, SquallArchiveCompressor compressor, Path archivePath) throws IOException {
        final FSDataOutputStream os = fs.create(archivePath, false);
        try {
            for (final File file : files) {
                if (file.isDirectory()) {
                    batchAppendDirectory(os, file, Lists.newArrayList(file.getName()), compressor, archivePath.getName());
                } else {
                    internalAppendFile(os, file, Collections.<String>emptyList(), compressor, archivePath.getName());
                }
            }
            commit();
        } finally {
            os.close();
        }
    }

    private void batchAppendDirectory(FSDataOutputStream os, File directory, List<String> parentDirectories, SquallArchiveCompressor compressor, String archiveFilename) throws IOException {
        for (final File file : sorted(directory.listFiles())) {
            if (file.isDirectory()) {
                final List<String> newParentDirectories = Lists.newArrayList(parentDirectories);
                newParentDirectories.add(file.getName());
                batchAppendDirectory(os, file, newParentDirectories, compressor, archiveFilename);
            } else {
                internalAppendFile(os, file, parentDirectories, compressor, archiveFilename);
            }
        }
    }

    /**
     * recursively append a directory to the archive
     * does not modify metadata.txt until {@link #commit()} is called
     *
     * @param directory the directory to append
     * @throws IOException if there is an IO problem
     */
    public void appendDirectory(File directory) throws IOException {
        appendDirectory(directory, defaultCompressor);
    }

    public void appendDirectory(File directory, SquallArchiveCompressor compressor) throws IOException {
        appendDirectory(directory, Collections.<String>emptyList(), compressor);
    }

    private void appendDirectory(File directory, List<String> parentDirectories, SquallArchiveCompressor compressor) throws IOException {
        if (!directory.exists() || !directory.isDirectory()) {
            throw new FileNotFoundException(directory.getAbsolutePath() + " either does not exist or is not a directory");
        }

        final List<String> newParentDirectories = new ArrayList<String>(parentDirectories);
        newParentDirectories.add(directory.getName().replaceAll("\\s+", "_"));
        for (final File file : sorted(directory.listFiles())) {
            if (file.isDirectory()) {
                appendDirectory(file, newParentDirectories, compressor);
            } else {
                appendFile(file, newParentDirectories, compressor);
            }
        }
    }

    /**
     * append a file from the local file system into the archive
     * does not modify metadata.txt until {@link #commit()} is called
     *
     * @param file a file on the local file system
     * @throws IOException if the file does not exist or if there is an IO problem
     */
    public void appendFile(File file) throws IOException {
        appendFile(file, defaultCompressor);
    }

    public void appendFile(File file, SquallArchiveCompressor compressor) throws IOException {
        appendFile(file, Collections.<String>emptyList(), compressor);
    }

    private void appendFile(File file, List<String> parentDirectories, SquallArchiveCompressor compressor) throws IOException {
        if (!file.exists() || file.isDirectory()) {
            throw new FileNotFoundException(file.getAbsolutePath() + " either does not exist or is a directory");
        }

        final Path archivePath = newArchivePath();

        final FSDataOutputStream os = fs.create(archivePath, false);
        try {
            internalAppendFile(os, file, parentDirectories, compressor, archivePath.getName());
        } finally {
            os.close();
        }
    }

    private void internalAppendFile(FSDataOutputStream os, File file, List<String> parentDirectories, SquallArchiveCompressor compressor, String archiveFilename) throws IOException {
        final String baseFilename = file.getName().replaceAll("\\s+", "_");
        final String filename = makeFilename(parentDirectories, baseFilename);
        final long size = file.length();
        final long timestamp = file.lastModified();
        final long startOffset = os.getPos();

        final InputStream is = new BufferedInputStream(new FileInputStream(file));
        final String checksum;
        try {
            final CompressionOutputStream cos = compressor.newOutputStream(os);
            final DigestOutputStream dos = new DigestOutputStream(cos, ArchiveUtils.getMD5Digest());
            ByteStreams.copy(is, dos);
            checksum = ArchiveUtils.toHex(dos.getMessageDigest().digest());
            cos.finish();
        } finally {
            is.close();
        }

        pendingMetadataWrites.add(new FileMetadata(filename, size, timestamp, checksum, startOffset, compressor, archiveFilename));
    }

    /**
     * flushes pending metadata writes to metadata.txt
     *
     * @throws IOException if there is an IO problem
     */
    public void commit() throws IOException {
        if (pendingMetadataWrites.isEmpty()) {
            return;
        }

        final Path metadataPath = new Path(path, "metadata.txt");
        final Path tmpMetadataPath = new Path(path, "metadata." + UUID.randomUUID() + ".txt.tmp");
        final BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(metadataPath), Charsets.UTF_8));
        try {
            final PrintWriter w = new PrintWriter(new OutputStreamWriter(fs.create(tmpMetadataPath, false), Charsets.UTF_8));
            try {
                for (String line = r.readLine(); line != null; line = r.readLine()) {
                    w.println(line);
                }
                for (final FileMetadata file : pendingMetadataWrites) {
                    w.println(TAB.join(file.getFilename(), file.getSize(), file.getTimestamp(), file.getChecksum(), file.getStartOffset(), file.getCompressor().getKey(), file.getArchiveFilename()));
                }
            } finally {
                w.close();
            }
        } finally {
            r.close();
        }
        fs.delete(metadataPath, false);
        fs.rename(tmpMetadataPath, metadataPath);
        pendingMetadataWrites.clear();
    }

    private static String makeFilename(List<String> parentDirectories, String baseFilename) {
        final List<String> stringsToJoin = new ArrayList<String>(parentDirectories);
        stringsToJoin.add(baseFilename);
        return SLASH.join(stringsToJoin);
    }

    private static File[] sorted(File[] files) {
        Arrays.sort(files, new Comparator<File>() {
            @Override
            public int compare(File o1, File o2) {
                return o1.getName().compareTo(o2.getName());
            }
        });
        return files;
    }
}