package datawave.query.testframework;

import datawave.ingest.csv.mr.input.CSVRecordReader;
import datawave.ingest.data.RawRecordContainer;
import datawave.ingest.data.TypeRegistry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;

import java.io.File;
import java.io.IOException;
import java.net.URI;

/**
 * Loads a CSV test file for ingestion.
 */
public class CSVTestFileLoader implements TestFileLoader {
    
    private final URI uri;
    private final Configuration conf;
    
    CSVTestFileLoader(URI u, Configuration cf) {
        this.uri = u;
        this.conf = cf;
    }
    
    @Override
    public void loadTestData(SequenceFile.Writer seqFile) throws IOException {
        TypeRegistry.reset();
        TypeRegistry.getInstance(this.conf);
        Path path = new Path(this.uri);
        File file = new File(this.uri);
        FileSplit split = new FileSplit(path, 0, file.length(), null);
        TaskAttemptContext ctx = new TaskAttemptContextImpl(this.conf, new TaskAttemptID());
        
        try (CSVRecordReader reader = new CSVRecordReader()) {
            reader.initialize(split, ctx);
            while (reader.nextKeyValue()) {
                RawRecordContainer raw = reader.getEvent();
                seqFile.append(new Text(), raw);
            }
        }
    }
    
}