package org.broadinstitute.hellbender.fakedata;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor;
import org.apache.commons.math3.linear.RealMatrix;
import org.broadinstitute.hellbender.tools.exome.*;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.function.Function;
import java.util.stream.IntStream;

/**
 * GC bias data consists of a {@link ReadCountCollection} and an array of gc contents
 * in order of the read counts' targets.
 *
 * @author David Benjamin <[email protected]>
 */
public class GCBiasSimulatedData {

    // a reasonable default GC bias curve
    private static Function<Double, Double> QUADRATIC_GC_BIAS_CURVE = gc -> 0.5 + 2*gc*(1-gc);

    // visible for the integration test
    public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
        final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
        final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);

        final Random random = new Random(13);
        final double[] gcContentByTarget = IntStream.range(0, numTargets)
                .mapToDouble(n -> 0.5 + 0.2*random.nextGaussian())
                .map(x -> Math.min(x,0.95)).map(x -> Math.max(x,0.05)).toArray();
        final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();

        // model mainly GC bias with a small random amount of non-GC bias
        // thus noise after GC correction should be nearly zero
        final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
        counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
            @Override
            public double visit(final int target, final int column, final double value) {
                return gcBiasByTarget[target]*(1.0 + 0.01*random.nextDouble());
            }
        });
        final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
        return new ImmutablePair<>(rcc, gcContentByTarget);
    }

    /**
     *
     * @param readCountsFile    A simulated read counts file with GC bias effects
     * @param targetsFile       A simulated targets file with GC content annotation
     */
    public static void makeGCBiasInputFiles(final Pair<ReadCountCollection, double[]> data,
                                            final File readCountsFile, final File targetsFile) throws IOException {
        final ReadCountCollection inputCounts = data.getLeft();
        final double[] gcContentByTarget = data.getRight();
        ReadCountCollectionUtils.write(readCountsFile, inputCounts);

        final TargetWriter writer = new TargetWriter(targetsFile, Collections.singleton(TargetAnnotation.GC_CONTENT));
        for (int i = 0; i < gcContentByTarget.length; i++) {
            final Target unannotatedTarget = inputCounts.records().get(i).getTarget();
            final TargetAnnotationCollection annotations = new TargetAnnotationCollection();
            annotations.put(TargetAnnotation.GC_CONTENT, Double.toString(gcContentByTarget[i]));
            final Target target = new Target(unannotatedTarget.getName(), unannotatedTarget.getInterval(), annotations);
            writer.writeRecord(target);
        }
        writer.close();
    }
}