package com.lucidworks.hadoop.ingest;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.junit.Test;

import java.io.File;
import java.util.List;

import static com.lucidworks.hadoop.utils.ConfigurationKeys.COLLECTION;
import static com.lucidworks.hadoop.utils.ConfigurationKeys.ZK_CONNECT;
import static junit.framework.TestCase.assertEquals;
import static junit.framework.TestCase.assertNotNull;

public class SequenceFileIngestMapperTest extends BaseMiniClusterTestCase {

    private static final Path LOCAL_FRANKENSTEIN_SEQ_FILE = new Path(SequenceFileIngestMapperTest.class.getClassLoader()
            .getResource("sequence" + File.separator + "frankenstein_text_text.seq").toString());

    @Test
    public void test() throws Exception {
        prepareFrankensteinSeqFileInput();
        Configuration conf = getDefaultSequenceFileIngestMapperConfiguration();
        Job job = createJobBasedOnConfiguration(conf, SequenceFileIngestMapper.class);
        ((JobConf)job.getConfiguration()).setInputFormat(SequenceFileInputFormat.class);

        List<String> results = runJobSuccessfully(job, 776);

        assertNumDocsProcessed(job, 776);
        assertEquals(776, results.size());
        for (String docStr : results) {
            assertNotNull(docStr);
        }

    }

    private void prepareFrankensteinSeqFileInput() throws Exception {
        copyLocalInputToHdfs(LOCAL_FRANKENSTEIN_SEQ_FILE.toUri().toString(), "frankenstein_text_text.seq");
    }

    private Configuration getDefaultSequenceFileIngestMapperConfiguration() {
        Configuration conf = getBaseConfiguration();
        conf.set("io.serializations", "com.lucidworks.hadoop.io.impl.LWMockSerealization");
        conf.set(COLLECTION, "collection");
        conf.set(ZK_CONNECT, "localhost:0000");
        conf.set("idField", "id");

        return conf;
    }
}