Java Code Examples for org.apache.commons.math3.linear.RealMatrix#setRow()

The following examples show how to use org.apache.commons.math3.linear.RealMatrix#setRow() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkConverter.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create an Apache Commons RealMatrix from a Spark RowMatrix.
 *
 * @param r Never {@code null}
 * @param cachedNumRows Checking the number of rows in {@code r} can be time-consuming.  Provide the value here, if it is already known.
 *                      Use {@code -1} if unknown.
 * @return Never {@code null}
 */
public static RealMatrix convertSparkRowMatrixToRealMatrix(final RowMatrix r, final int cachedNumRows) {

    Utils.nonNull(r, "Input row matrix cannot be null");

    int numRows;
    if (cachedNumRows == -1) {
        // This takes a while in Spark
        numRows = (int) r.numRows();
    } else {
        numRows = cachedNumRows;
    }

    final int numCols = (int) r.numCols();

    // This cast is required, even though it would not seem necessary, at first.  Exact reason why is unknown.
    //   Will fail compilation if the cast is removed.
    final Vector [] rowVectors = (Vector []) r.rows().collect();

    final RealMatrix result = new Array2DRowRealMatrix(numRows, numCols);
    for (int i = 0; i < numRows; i++) {
        result.setRow(i, rowVectors[i].toArray() );
    }
    return result;
}
 
Example 2
Source File: ReadCountCollection.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Rearrange the targets so that they are in a particular order.
 * @return a new collection.
 * @throws IllegalArgumentException if any of the following is true:
 * <ul>
 *     <li>{@code targetsInOrder} is {@code null},</li>
 *     <li>is empty,</li>
 *     <li>it contains {@code null},</li>
 *     <li>contains any target not present in this collection.</li>
 * </ul>
 */
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
    Utils.nonNull(targetsInOrder);
    Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
    final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
    final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
    for (int i = 0; i < targets.size(); i++) {
        targetToIndex.put(targets.get(i), i);
    }
    for (int i = 0; i < targetsInOrder.size(); i++) {
        final Target target = targetsInOrder.get(i);
        Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
        counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
    }
    return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
 
Example 3
Source File: CreateReadCountPanelOfNormals.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static RealMatrix constructReadCountMatrix(final Logger logger,
                                                   final List<File> inputReadCountFiles,
                                                   final SAMSequenceDictionary sequenceDictionary,
                                                   final List<SimpleInterval> intervals) {
    logger.info("Validating and aggregating input read-counts files...");
    final int numSamples = inputReadCountFiles.size();
    final int numIntervals = intervals.size();
    final RealMatrix readCountMatrix = new Array2DRowRealMatrix(numSamples, numIntervals);
    final ListIterator<File> inputReadCountFilesIterator = inputReadCountFiles.listIterator();
    while (inputReadCountFilesIterator.hasNext()) {
        final int sampleIndex = inputReadCountFilesIterator.nextIndex();
        final File inputReadCountFile = inputReadCountFilesIterator.next();
        logger.info(String.format("Aggregating read-counts file %s (%d / %d)", inputReadCountFile, sampleIndex + 1, numSamples));
        final SimpleCountCollection readCounts = SimpleCountCollection.read(inputReadCountFile);
        if (!CopyNumberArgumentValidationUtils.isSameDictionary(readCounts.getMetadata().getSequenceDictionary(), sequenceDictionary)) {
            logger.warn(String.format("Sequence dictionary for read-counts file %s does not match those in other read-counts files.", inputReadCountFile));
        }
        Utils.validateArg(readCounts.getIntervals().equals(intervals),
                String.format("Intervals for read-counts file %s do not match those in other read-counts files.", inputReadCountFile));
        readCountMatrix.setRow(sampleIndex, readCounts.getCounts());
    }
    return readCountMatrix;
}
 
Example 4
Source File: SparkConverter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create an Apache Commons RealMatrix from a Spark RowMatrix.
 *
 * @param r Never {@code null}
 * @param cachedNumRows Checking the number of rows in {@code r} can be time-consuming.  Provide the value here, if it is already known.
 *                      Use {@code -1} if unknown.
 * @return Never {@code null}
 */
public static RealMatrix convertSparkRowMatrixToRealMatrix(final RowMatrix r, final int cachedNumRows) {

    Utils.nonNull(r, "Input row matrix cannot be null");

    int numRows;
    if (cachedNumRows == -1) {
        // This takes a while in Spark
        numRows = (int) r.numRows();
    } else {
        numRows = cachedNumRows;
    }

    final int numCols = (int) r.numCols();

    // This cast is required, even though it would not seem necessary, at first.  Exact reason why is unknown.
    //   Will fail compilation if the cast is removed.
    final Vector [] rowVectors = (Vector []) r.rows().collect();

    final RealMatrix result = new Array2DRowRealMatrix(numRows, numCols);
    for (int i = 0; i < numRows; i++) {
        result.setRow(i, rowVectors[i].toArray() );
    }
    return result;
}
 
Example 5
Source File: PoNTestUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Reads a very basic tsv (numbers separated by tabs) into a RealMatrix.
 * <p>Very little error checking happens in this method</p>
 *
 * @param inputFile readable file.  Not {@code null}
 * @return never {@code null}
 */
public static RealMatrix readTsvIntoMatrix(final File inputFile) {
    IOUtils.canReadFile(inputFile);
    final List<double []> allData = new ArrayList<>();
    int ctr = 0;
    try {

        final CSVReader reader = new CSVReader(new FileReader(inputFile), '\t', CSVWriter.NO_QUOTE_CHARACTER);
        String[] nextLine;
        while ((nextLine = reader.readNext()) != null) {
            ctr++;
            allData.add(Arrays.stream(nextLine).filter(s -> StringUtils.trim(s).length() > 0).map(s -> Double.parseDouble(StringUtils.trim(s))).mapToDouble(d -> d).toArray());
        }
    } catch (final IOException ioe) {
        Assert.fail("Could not open test file: " + inputFile, ioe);
    }
    final RealMatrix result = new Array2DRowRealMatrix(allData.size(), allData.get(0).length);
    for (int i = 0; i < result.getRowDimension(); i++) {
        result.setRow(i, allData.get(i));
    }
    return result;
}
 
Example 6
Source File: SomaticLikelihoodsEngineUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testEvidence() {
    // one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
    // of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
    // (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
    // and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions

    final double[] prior = new double[] {1, 2};
    final RealMatrix logLikelihoods = new Array2DRowRealMatrix(2, 4);
    logLikelihoods.setRow(0, MathUtils.applyToArray(new double[] {0.1, 4.0, 3.0, -10}, MathUtils::log10ToLog));
    logLikelihoods.setRow(1, MathUtils.applyToArray(new double[] {-12, -9, -5.0, 0.5}, MathUtils::log10ToLog));
    final double calculatedLogEvidence = SomaticLikelihoodsEngine.logEvidence(logLikelihoods, prior);
    final double[] maxLikelihoodCounts = new double[] {3, 1};
    final double expectedLogEvidence = SomaticLikelihoodsEngine.logDirichletNormalization(prior)
            - SomaticLikelihoodsEngine.logDirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts))
            + new IndexRange(0,logLikelihoods.getColumnDimension()).sum(read -> logLikelihoods.getColumnVector(read).getMaxValue());
    Assert.assertEquals(calculatedLogEvidence, expectedLogEvidence, 1e-5);

    // when there's just one read we can calculate the likelihood exactly

    final double[] prior2 = MathUtils.applyToArray(new double[] {1, 2}, MathUtils::log10ToLog);
    final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
    log10Likelihoods2.setRow(0, MathUtils.applyToArray(new double[] {0.1}, MathUtils::log10ToLog));
    log10Likelihoods2.setRow(1, MathUtils.applyToArray(new double[] {0.5}, MathUtils::log10ToLog));
    final double calculatedLogEvidence2 = SomaticLikelihoodsEngine.logEvidence(log10Likelihoods2, prior2);
    final double[] delta0 = new double[] {1, 0};
    final double[] delta1 = new double[] {0, 1};
    final double expectedLogEvidence2 = NaturalLogUtils.logSumExp(log10Likelihoods2.getEntry(0,0) +
            SomaticLikelihoodsEngine.logDirichletNormalization(prior2)
            - SomaticLikelihoodsEngine.logDirichletNormalization(MathArrays.ebeAdd(prior2, delta0)),
            + log10Likelihoods2.getEntry(1,0) +
            SomaticLikelihoodsEngine.logDirichletNormalization(prior2)
                    - SomaticLikelihoodsEngine.logDirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
    Assert.assertEquals(calculatedLogEvidence2, expectedLogEvidence2, 0.05);
}
 
Example 7
Source File: SparkConverter.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Convert a local (not distributed) Spark Matrix to an Apache Commons matrix.
 *
 * @param r Never {@code null}
 * @return Not {@code null}
 */
public static RealMatrix convertSparkMatrixToRealMatrix(final Matrix r){
    final RealMatrix result = new Array2DRowRealMatrix(r.numRows(), r.numCols());
    final double [] columnMajorMat = r.toArray();
    for (int i = 0; i < r.numRows(); i++) {
        result.setRow(i, Arrays.copyOfRange(columnMajorMat, i * r.numCols(), i * r.numCols() + r.numCols()) );
    }
    return result;
}
 
Example 8
Source File: FilterIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static RealMatrix constructReadCountMatrix(final Logger logger,
                                                   final List<File> inputReadCountFiles,
                                                   final SimpleIntervalCollection intersectedIntervals) {
    logger.info("Validating and aggregating input read-counts files...");
    final int numSamples = inputReadCountFiles.size();
    final int numIntervals = intersectedIntervals.size();
    //construct the interval subset to pull out from the read-count files
    final Set<SimpleInterval> intervalSubset = new HashSet<>(intersectedIntervals.getRecords());
    final RealMatrix readCountMatrix = new Array2DRowRealMatrix(numSamples, numIntervals);
    final ListIterator<File> inputReadCountFilesIterator = inputReadCountFiles.listIterator();
    while (inputReadCountFilesIterator.hasNext()) {
        final int sampleIndex = inputReadCountFilesIterator.nextIndex();
        final File inputReadCountFile = inputReadCountFilesIterator.next();
        logger.info(String.format("Aggregating read-counts file %s (%d / %d)", inputReadCountFile, sampleIndex + 1, numSamples));
        final SimpleCountCollection readCounts = SimpleCountCollection.read(inputReadCountFile);
        if (!CopyNumberArgumentValidationUtils.isSameDictionary(
                readCounts.getMetadata().getSequenceDictionary(),
                intersectedIntervals.getMetadata().getSequenceDictionary())) {
            logger.warn(String.format("Sequence dictionary for read-counts file %s is inconsistent with those for other inputs.", inputReadCountFile));
        }
        final double[] subsetReadCounts = readCounts.getRecords().stream()
                .filter(c -> intervalSubset.contains(c.getInterval()))
                .mapToDouble(SimpleCount::getCount)
                .toArray();
        Utils.validateArg(subsetReadCounts.length == intervalSubset.size(),
                String.format("Intervals for read-count file %s do not contain all specified intervals.",
                        inputReadCountFile));
        readCountMatrix.setRow(sampleIndex, subsetReadCounts);
    }
    return readCountMatrix;
}
 
Example 9
Source File: SparkConverter.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Convert a local (not distributed) Spark Matrix to an Apache Commons matrix.
 *
 * @param r Never {@code null}
 * @return Not {@code null}
 */
public static RealMatrix convertSparkMatrixToRealMatrix(final Matrix r){
    final RealMatrix result = new Array2DRowRealMatrix(r.numRows(), r.numCols());
    final double [] columnMajorMat = r.toArray();
    for (int i = 0; i < r.numRows(); i++) {
        result.setRow(i, Arrays.copyOfRange(columnMajorMat, i * r.numCols(), i * r.numCols() + r.numCols()) );
    }
    return result;
}
 
Example 10
Source File: Covariance.java    From macrobase with Apache License 2.0 5 votes vote down vote up
public static RealMatrix getCovariance(List<Datum> data) {
    int rank = data.get(0).metrics().getDimension();

    RealMatrix ret = new Array2DRowRealMatrix(data.size(), rank);
    int index = 0;
    for (Datum d : data) {
        ret.setRow(index, d.metrics().toArray());
        index += 1;
    }

    return (new org.apache.commons.math3.stat.correlation.Covariance(ret)).getCovarianceMatrix();
}
 
Example 11
Source File: HDF5PCACoveragePoNCreationUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name="singleEigensample")
public Object[][] simpleEigensampleData() {
    final List<Object[]> result = new ArrayList<>();
    final int NUM_TARGETS = 10;
    final int NUM_SAMPLES = 5;
    final List<Target> targets = IntStream.range(0, NUM_TARGETS).boxed()
            .map(i -> new Target("target_" + i, new SimpleInterval("1", 100*i + 1, 100*i + 5)))
            .collect(Collectors.toList());
    final List<String> columnNames = IntStream.range(0, NUM_SAMPLES).boxed()
            .map(i -> "sample_" + i)
            .collect(Collectors.toList());

    double [][] countsArray = new double[NUM_TARGETS][NUM_SAMPLES];
    final RealMatrix counts = new Array2DRowRealMatrix(countsArray);

    // All row data is the same (0,1,2,3,4...)
    final double [] rowData = IntStream.range(0, NUM_SAMPLES).boxed()
            .mapToDouble(i -> i).toArray();
    for (int i = 0; i < NUM_TARGETS; i++) {
        counts.setRow(i, rowData);
    }

    new ReadCountCollection(targets, columnNames, counts);

    result.add(new Object[] {new ReadCountCollection(targets, columnNames, counts)});
    return result.toArray(new Object[result.size()][]);
}
 
Example 12
Source File: SomaticLikelihoodsEngineUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testEvidence() {
    // one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
    // of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
    // (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
    // and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions

    final double[] prior = new double[] {1, 2};
    final RealMatrix log10Likelihoods = new Array2DRowRealMatrix(2, 4);
    log10Likelihoods.setRow(0, new double[] {0.1, 4.0, 3.0, -10});
    log10Likelihoods.setRow(1, new double[] {-12, -9, -5.0, 0.5});
    final double calculatedLog10Evidence = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods, prior);
    final double[] maxLikelihoodCounts = new double[] {3, 1};
    final double expectedLog10Evidence = SomaticLikelihoodsEngine.log10DirichletNormalization(prior)
            - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts))
            + new IndexRange(0,log10Likelihoods.getColumnDimension()).sum(read -> log10Likelihoods.getColumnVector(read).getMaxValue());
    Assert.assertEquals(calculatedLog10Evidence, expectedLog10Evidence, 1e-5);

    // when there's just one read we can calculate the likelihood exactly

    final double[] prior2 = new double[] {1, 2};
    final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
    log10Likelihoods2.setRow(0, new double[] {0.1});
    log10Likelihoods2.setRow(1, new double[] {0.5});
    final double calculatedLog10Evidence2 = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods2, prior2);
    final double[] delta0 = new double[] {1, 0};
    final double[] delta1 = new double[] {0, 1};
    final double expectedLog10Evidence2 = MathUtils.log10SumLog10(log10Likelihoods2.getEntry(0,0) +
            SomaticLikelihoodsEngine.log10DirichletNormalization(prior2)
            - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta0)),
            + log10Likelihoods2.getEntry(1,0) +
            SomaticLikelihoodsEngine.log10DirichletNormalization(prior2)
                    - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
    Assert.assertEquals(calculatedLog10Evidence2, expectedLog10Evidence2, 0.05);


}
 
Example 13
Source File: CaseToPoNTargetMapper.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Re-arrange the input rows from the PoN to the case data target order.
 * @param ponCounts count matrix with row organized using the PoN target order.
 * @return never {@code null} a new matrix with the row order changed according to the case read count target order.
 */
public RealMatrix fromPoNToCaseCounts(final RealMatrix ponCounts) {
    final Map<String,Integer> ponTargetsIndexes = IntStream.range(0, ponTargetNames.size()).boxed()
            .collect(Collectors.toMap(ponTargetNames::get, Function.identity()));
    final RealMatrix result = new Array2DRowRealMatrix(ponCounts.getRowDimension(),ponCounts.getColumnDimension());
    for (int i = 0; i < outputTargets.size(); i++) {
        final Target target = outputTargets.get(i);
        result.setRow(i, ponCounts.getRow(ponTargetsIndexes.get(target.getName())));
    }
    return result;
}
 
Example 14
Source File: CaseToPoNTargetMapper.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Re-arrange case read counts counts based on PoN target order.
 * @param caseCounts the input counts to rearrange.
 * @return never {@code null}, a new matrix with row sorted according to the PoN target order.
 */
public RealMatrix fromCaseToPoNCounts(final RealMatrix caseCounts) {
    Utils.nonNull(caseCounts, "Case-sample counts cannot be null.");
    final RealMatrix result = new Array2DRowRealMatrix(ponTargetNames.size(), caseCounts.getColumnDimension());
    for (int i = 0; i < ponTargetNames.size(); i++) {
        final String targetName = ponTargetNames.get(i);
        final Integer inputIndex = caseTargetIndexes.get(targetName);
        if (inputIndex == null) {
            throw new UserException.BadInput(String.format("missing PoN target name %s in input read counts", targetName));
        }
        result.setRow(i, caseCounts.getRow(inputIndex));
    }
    return result;
}
 
Example 15
Source File: KDTree.java    From macrobase with Apache License 2.0 4 votes vote down vote up
/**
 * Build a KD-Tree that makes the splits based on the midpoint of the widest dimension.
 * This is the approach described in [Gray, Moore 2003] based on [Deng, Moore 1995].
 * @param data
 * @param leafCapacity
 */
public KDTree(List<Datum> data, int leafCapacity) {
    this.leafCapacity = leafCapacity;
    this.k = data.get(0).metrics().getDimension();
    this.boundaries = new double[k][2];

    boundaries = AlgebraUtils.getBoundingBox(data);

    if (data.size() > this.leafCapacity) {

        double[] differences = new double[this.k];
        for (int i = 0; i < k; i++) {
            differences[i] = this.boundaries[i][1] - this.boundaries[i][0];
        }

        int widestDimension = 0;
        double maxDidth = -1;
        for (int i = 0; i < k ; i++) {
            if (differences[i] > maxDidth) {
                maxDidth = differences[i];
                widestDimension = i;
            }
        }

        this.splitDimension = widestDimension;

        // XXX: This is the slow part!!!
        Collections.sort(data, new DatumComparator(splitDimension));

        int splitIndex = data.size() / 2;
        Datum belowSplit = data.get(splitIndex - 1);
        Datum aboveSplit = data.get(splitIndex);
        this.splitValue = 0.5 * (
                aboveSplit.metrics().getEntry(splitDimension) + belowSplit.metrics().getEntry(splitDimension)
        );

        this.loChild = new KDTree(data.subList(0, splitIndex), leafCapacity);
        this.hiChild = new KDTree(data.subList(splitIndex, data.size()), leafCapacity);
        this.nBelow = data.size();

        this.mean = (loChild.mean.mapMultiply(loChild.nBelow)
                     .add(hiChild.mean.mapMultiply(hiChild.nBelow))
                     .mapDivide(loChild.nBelow + hiChild.nBelow));
    } else {
        this.items = data;
        this.nBelow = data.size();

        RealMatrix ret = new Array2DRowRealMatrix(data.size(), this.k);
        RealVector sum = new ArrayRealVector(this.k);

        int index = 0;
        for (Datum d : data) {
            ret.setRow(index, d.metrics().toArray());
            sum = sum.add(d.metrics());
            index += 1;
        }

        this.mean = sum.mapDivide(this.nBelow);
    }
}
 
Example 16
Source File: SomaticLikelihoodsEngineUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testAlleleFractionsPosterior() {

    //likelihoods completely favor allele 0 over allele 1 for every read, so
    // we should get no counts for allele 1
    final double[] prior1 = new double[] {1, 1};
    final RealMatrix mat1 = new Array2DRowRealMatrix(2, 4);
    mat1.setRow(0, new double[] {0, 0, 0, 0});
    mat1.setRow(1, new double[] {-10, -10, -10, -10});
    final double[] posterior1 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat1, prior1);
    final double[] expectedCounts1 = new double[] {4, 0};

    final double expectedPosterior1[] = MathArrays.ebeAdd(prior1, expectedCounts1);
    Assert.assertArrayEquals(posterior1, expectedPosterior1, 1.0e-6);

    //prior is extremely strong and outweighs ambiguous likelihoods
    final double[] prior2 = new double[] {1e8, 1};
    final RealMatrix mat2 = new Array2DRowRealMatrix(2, 4);
    mat2.setRow(0, new double[] {0, 0, 0, 0});
    mat2.setRow(1, new double[] {0, 0, 0, 0});
    final double[] posterior2 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat2, prior2);
    final double[] expectedCounts2 = new double[] {4, 0};

    final double expectedPosterior2[] = MathArrays.ebeAdd(prior2, expectedCounts2);
    Assert.assertArrayEquals(posterior2, expectedPosterior2, 1.0e-6);

    //prior is extremely weak and likelihoods speak for themselves
    final double[] prior3 = new double[] {1e-6, 1e-6};
    final RealMatrix mat3 = new Array2DRowRealMatrix(2, 4);
    mat3.setRow(0, new double[] {0, 0, 0, -10});
    mat3.setRow(1, new double[] {-10, -10, -10, 0});
    final double[] posterior3 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat3, prior3);
    final double[] expectedCounts3 = new double[] {3, 1};

    final double expectedPosterior3[] = MathArrays.ebeAdd(prior3, expectedCounts3);
    Assert.assertArrayEquals(posterior3, expectedPosterior3, 1.0e-6);

    // test convergence
    final double[] prior4 = new double[] {0.2, 1.7};
    final RealMatrix mat4 = new Array2DRowRealMatrix(2, 4);
    mat4.setRow(0, new double[] {0.1, 5.2, 0.5, 0.2});
    mat4.setRow(1, new double[] {2.6, 0.6, 0.5, 0.4});
    final double[] posterior4 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat4, prior4);
    final double[] counts4 = MathArrays.ebeSubtract(posterior4, prior4);
    Assert.assertArrayEquals(counts4, SomaticLikelihoodsEngine.getEffectiveCounts(mat4, posterior4), 1.0e-3);
}
 
Example 17
Source File: SomaticLikelihoodsEngineUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testAlleleFractionsPosterior() {

    //likelihoods completely favor allele 0 over allele 1 for every read, so
    // we should get no counts for allele 1
    final double[] prior1 = new double[] {1, 1};
    final RealMatrix mat1 = new Array2DRowRealMatrix(2, 4);
    mat1.setRow(0, new double[] {0, 0, 0, 0});
    mat1.setRow(1, new double[] {-30, -30, -30, -30});
    final double[] posterior1 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat1, prior1);
    final double[] expectedCounts1 = new double[] {4, 0};

    final double expectedPosterior1[] = MathArrays.ebeAdd(prior1, expectedCounts1);
    assertEqualsDoubleArray(posterior1, expectedPosterior1, 1.0e-6);

    //prior is extremely strong and outweighs ambiguous likelihoods
    final double[] prior2 = new double[] {1e8, 1};
    final RealMatrix mat2 = new Array2DRowRealMatrix(2, 4);
    mat2.setRow(0, new double[] {0, 0, 0, 0});
    mat2.setRow(1, new double[] {0, 0, 0, 0});
    final double[] posterior2 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat2, prior2);
    final double[] expectedCounts2 = new double[] {4, 0};

    final double expectedPosterior2[] = MathArrays.ebeAdd(prior2, expectedCounts2);
    assertEqualsDoubleArray(posterior2, expectedPosterior2, 1.0e-6);

    //prior is extremely weak and likelihoods speak for themselves
    final double[] prior3 = new double[] {1e-6, 1e-6};
    final RealMatrix mat3 = new Array2DRowRealMatrix(2, 4);
    mat3.setRow(0, new double[] {0, 0, 0, -30});
    mat3.setRow(1, new double[] {-30, -30, -30, 0});
    final double[] posterior3 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat3, prior3);
    final double[] expectedCounts3 = new double[] {3, 1};

    final double expectedPosterior3[] = MathArrays.ebeAdd(prior3, expectedCounts3);
    assertEqualsDoubleArray(posterior3, expectedPosterior3, 1.0e-6);

    // test convergence
    final double[] prior4 = new double[] {0.2, 1.7};
    final RealMatrix mat4 = new Array2DRowRealMatrix(2, 4);
    mat4.setRow(0, new double[] {0.1, 5.2, 0.5, 0.2});
    mat4.setRow(1, new double[] {2.6, 0.6, 0.5, 0.4});
    final double[] posterior4 = SomaticLikelihoodsEngine.alleleFractionsPosterior(mat4, prior4);
    final double[] counts4 = MathArrays.ebeSubtract(posterior4, prior4);
    assertEqualsDoubleArray(counts4, SomaticLikelihoodsEngine.getEffectiveCounts(mat4, posterior4), 1.0e-3);
}