package com.lucidworks.hadoop.ingest;

import com.lucidworks.hadoop.io.LWDocumentWritable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import static com.lucidworks.hadoop.utils.ConfigurationKeys.*;
import static junit.framework.Assert.assertEquals;
import static junit.framework.TestCase.assertNotNull;


public class DirectoryIngestMapperTest extends BaseMiniClusterTestCase {
    private transient static Logger log = LoggerFactory.getLogger(DirectoryIngestMapperTest.class);

    private Configuration conf;
    private JobConf jobConf;
    private int tempFiles;

    @Before
    public void setUp() throws Exception {
        conf = getDefaultDirectoryIngestMapperConfiguration();
        Path dir = new Path(fs.getWorkingDirectory(), "build");
        Path sub = new Path(dir, "DIMT");
        Path tempDir = new Path(sub, "tmp-dir");
        Path seqDir = new Path(sub, "seq-dir");// this is the location where the
        // fixture will write inputs.seq
        fs.mkdirs(tempDir);
        tempFiles = setupDir(fs, tempDir);
        conf.set(TEMP_DIR, seqDir.toString());
        jobConf = new JobConf(conf);
        jobConf.setMapperClass(DirectoryIngestMapper.class);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        FileOutputFormat.setOutputPath(jobConf, OUTPUT_DIRECTORY_PATH);
        org.apache.hadoop.mapred.FileInputFormat.setInputPaths(jobConf, new Path(tempDir, "*"));
        Path[] paths = org.apache.hadoop.mapred.FileInputFormat.getInputPaths(jobConf);
        assertEquals(1, paths.length);
    }

    @Test
    public void testDir() throws Exception {
        jobConf.set(DirectoryIngestMapper.DIRECTORY_ADD_SUBDIRECTORIES, "false");
        doTest(tempFiles - 4);  // The 4 subdirectories should be ignored
    }

    @Test
    public void testSubDir() throws Exception {
        jobConf.set(DirectoryIngestMapper.DIRECTORY_ADD_SUBDIRECTORIES, "true");
        doTest(tempFiles);
    }

    private Configuration getDefaultDirectoryIngestMapperConfiguration() {
        Configuration conf = getBaseConfiguration();
        conf.set(COLLECTION, "collection");
        conf.set(ZK_CONNECT, "localhost:0000");
        conf.set("idField", "id");

        return conf;
    }

    private void doTest(int expectedNumDocs) throws Exception {
        new DirectoryIngestMapper().getFixture().init(jobConf);
        Job job = Job.getInstance(jobConf);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LWDocumentWritable.class);

        List<String> results = runJobSuccessfully(job, expectedNumDocs);

        assertNumDocsProcessed(job, expectedNumDocs);
        for (String docStr : results) {
            assertNotNull(docStr);
        }
    }

    private int setupDir(FileSystem fs, Path base) throws URISyntaxException, IOException {
        fs.mkdirs(base);
        int count = 0;
        List<Path> paths = new ArrayList<Path>();
        for (int i = 0; i < 6; i++, count++) {
            Path path = new Path(DirectoryIngestMapperTest.class.getClassLoader()
                    .getResource("dir" + File.separator + "frank_txt_" + i + ".txt").toURI());
            Path dst = new Path(base, "frank_txt_" + i + ".txt");
            paths.add(dst);
            fs.copyFromLocalFile(path, dst);
        }
        for (int i = 0; i < 3; i++, count++) {
            Path path = new Path(DirectoryIngestMapperTest.class.getClassLoader()
                    .getResource("dir" + File.separator + "test" + i + ".pdf").toURI());
            Path dst = new Path(base, "test" + i + ".pdf");
            paths.add(dst);
            fs.copyFromLocalFile(path, dst);
        }
        for (int i = 0; i < 2; i++, count++) {
            Path path = new Path(DirectoryIngestMapperTest.class.getClassLoader()
                    .getResource("dir" + File.separator + "test" + i + ".doc").toURI());
            Path dst = new Path(base, "test" + i + ".doc");
            paths.add(dst);
            fs.copyFromLocalFile(path, dst);
        }
        Path subDirA = new Path(base, "subDirA");
        for (int i = 0; i < 2; i++, count++) {// Subdirectories A
            Path path = new Path(DirectoryIngestMapperTest.class.getClassLoader()
                    .getResource("dir" + File.separator + "test" + i + ".doc").toURI());
            Path dst = new Path(subDirA, "test" + i + ".doc");
            paths.add(dst);
            fs.copyFromLocalFile(path, dst);
        }
        Path subDirB = new Path(base, "subDirB");
        for (int i = 0; i < 2; i++, count++) {// Subdirectories B
            Path path = new Path(DirectoryIngestMapperTest.class.getClassLoader()
                    .getResource("dir" + File.separator + "test" + i + ".doc").toURI());
            Path dst = new Path(subDirB, "test" + i + ".pdf");
            paths.add(dst);
            fs.copyFromLocalFile(path, dst);
        }
        for (Path path : paths) {
            FileStatus fileStatus = fs.getFileStatus(path);
            System.out.println("Status: " + fileStatus.getPath());
        }
        return count;
    }
}