/*
 * MIT License
 *
 * Copyright 2018 Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package org.broadinstitute.dropseqrna.barnyard.digitalexpression;

import java.io.BufferedInputStream;
import java.io.File;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;

import org.broadinstitute.dropseqrna.barnyard.digitalexpression.DgeIterator.DgeLine;

import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import picard.util.TabbedInputParser;

/**
 * Given a DGE file, parse the header and return DGE data a line at a time.
 * Also holds convenience methods to loop up cell barcode information (position, list of cell barcodes)
 * @author nemesh
 *
 */
public class DgeIterator implements CloseableIterator <DgeLine>{

	private final BufferedInputStream inputStream;
	private final DgeHeader dgeHeader;
	private TabbedInputParser parser;
	// map of cell barcode to position.
	private final LinkedHashMap<String, Integer> identifierMap;
	private final String geneColumnLabel;

	public DgeIterator (final File input) {
		this(new BufferedInputStream(IOUtil.openFileForReading(input)), input.getAbsolutePath());
	}

	public DgeIterator (final BufferedInputStream inputStream, final String filename) {
		this.inputStream=inputStream;
		final DgeHeaderCodec headerCodec = new DgeHeaderCodec();
        this.dgeHeader = headerCodec.decode(inputStream, filename);
        // set up a tabbed input parser for the line by line data.
        this.parser = new TabbedInputParser(false, inputStream);

        if (!parser.hasNext())  { // empty file.
            parser.close();
            identifierMap=new LinkedHashMap<>();
            geneColumnLabel="";
            return;
        }

        String [] header = parser.next();
        geneColumnLabel = header[0];

        // provide dependable iteration order for keys.
        LinkedHashMap<String, Integer> tempMap=new LinkedHashMap<>();
        // populate cell barcode map.
        for (int i=1; i<header.length; i++)
        	// position map 0 based.
        	tempMap.put(header[i], i-1);
        this.identifierMap= tempMap;

	}

	public DgeHeader getDgeHeader () {
		return this.dgeHeader;
	}

	@Override
	public boolean hasNext() {
		return parser.hasNext();
	}

	@Override
	public DgeLine next() {
		if (!hasNext()) return null;
		String [] line = this.parser.next();
		return new DgeLine(this.identifierMap, line);
	}

	public void close() {
		CloserUtil.close(this.parser);
		CloserUtil.close(this.inputStream);
	}

	public void subset (final Set<String> identifiers) {

	}

	/**
	 * Identifiers are ordered in the same way they are originally input.
	 * @return
	 */
	public List<String> getIdentifiers () {
		return new ArrayList<>(this.identifierMap.keySet());
	}

	/**
	 * Get the identifier at the top right hand corner of the matrix that labels the gene column.
	 * This is usually "GENE" for DGE files, but may be different for meta-cells or eQTL data.
	 * @return
	 */
	public String getGeneColumnLabel() {
		return geneColumnLabel;
	}

	public class DgeLine {
		private String gene;
		private double [] expression;
		LinkedHashMap<String, Integer> identifierMap;

		DgeLine (final LinkedHashMap<String, Integer> identifierMap, final String [] line) {
			this.identifierMap=identifierMap;
			expression= new double [line.length-1];
			this.gene=line[0];
			for (int i=1; i<line.length; i++)
				expression[i-1]=Double.parseDouble(line[i]);
		}

		DgeLine (final LinkedHashMap<String, Integer> identifierMap, final String gene, final double [] expression) {
			this.identifierMap=identifierMap;
			this.gene=gene;
			this.expression=expression;
		}

		public String getGene () {
			return this.gene;
		}

		public double getExpression (final String identifier) {
			Integer pos = identifierMap.get(identifier);
			if (pos==null)
				throw new IllegalStateException ("Asked for an identifier ["+identifier+"] that doesn't exist.");
			return expression[pos];
		}

		public void setExpression (final String identifier, final double value) {
			Integer pos = identifierMap.get(identifier);
			if (pos==null)
				throw new IllegalStateException ("Asked for an identifier ["+identifier+"] that doesn't exist.");
			expression[pos]=value;
		}

		public double [] getExpression () {
			return this.expression;
		}

		public Set<String> getIdentifiers () {
			return identifierMap.keySet();
		}

		LinkedHashMap<String, Integer> getIdentifierMap() {
			return this.identifierMap;
		}

		/**
		 * Is there any identifier with expression greater than 0?
		 * @return return true if at least one identifier is non-zero.
		 */
		public boolean isNonZero () {
			double totalCount = Arrays.stream(this.expression).sum();
			if (BigDecimal.ZERO.equals(new BigDecimal(totalCount))) return false;
			return true;
		}

		public DgeLine subset(final Set<String> identifiers) {
			LinkedHashMap<String, Integer> newMap = new LinkedHashMap<>();

			int index=0;
			double [] exp = new double [identifiers.size()];

			for (String id: this.identifierMap.keySet())
				if (identifiers.contains(id)) {
					newMap.put(id, index);
					exp[index]=this.getExpression(id);
					index++;
				}

			DgeLine l = new DgeLine(newMap, this.getGene(), exp);
			return l;
		}

	}




}