Java Code Examples for org.apache.commons.math3.random.RandomGeneratorFactory#createRandomGenerator()

The following examples show how to use org.apache.commons.math3.random.RandomGeneratorFactory#createRandomGenerator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CopyRatioModellerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testMCMC() {
    final double variance = 0.01;
    final double outlierProbability = 0.05;
    final int numSegments = 100;
    final double averageIntervalsPerSegment = 100.;
    final int numSamples = 150;
    final int numBurnIn = 50;
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));

    final SampleLocatableMetadata metadata = new SimpleSampleLocatableMetadata(
            "test-sample",
            new SAMSequenceDictionary(IntStream.range(0, numSegments)
                    .mapToObj(i -> new SAMSequenceRecord("chr" + i + 1, 10000))
                    .collect(Collectors.toList())));
    final CopyRatioSimulatedData simulatedData = new CopyRatioSimulatedData(
            metadata, variance, outlierProbability, numSegments, averageIntervalsPerSegment, rng);

    final CopyRatioModeller modeller = new CopyRatioModeller(simulatedData.getData().getCopyRatios(), simulatedData.getData().getSegments());
    modeller.fitMCMC(numSamples, numBurnIn);

    assertCopyRatioPosteriorCenters(modeller, simulatedData);
}
 
Example 2
Source File: AdaptiveMetropolisSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testGaussian() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double theoreticalMean : Arrays.asList(0)) {
        for (final double precision : Arrays.asList(1.0)) {
            final double variance = 1/precision;

            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(variance / NUM_SAMPLES);

            final Function<Double, Double> logPDF = x -> -(precision/2)*(x-theoreticalMean)*(x-theoreticalMean);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_GAUSSIAN_GUESS, INITIAL_STEP_SIZE,
                    Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);

            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x*x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean)*NUM_SAMPLES/(NUM_SAMPLES-1);

            Assert.assertEquals(sampleMean, theoreticalMean, 6 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, variance, variance/10);
        }
    }
}
 
Example 3
Source File: AdaptiveMetropolisSamplerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testBeta() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double a : Arrays.asList(10, 20, 30)) {
        for (final double b : Arrays.asList(10, 20, 30)) {

            final double theoreticalMean = a / (a + b);
            final double theoreticalVariance = a*b/((a+b)*(a+b)*(a+b+1));

            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(theoreticalVariance / NUM_SAMPLES);

            final Function<Double, Double> logPDF = x -> (a - 1) * Math.log(x) + (b - 1) * Math.log(1 - x);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_BETA_GUESS, INITIAL_STEP_SIZE, 0, 1);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);

            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x*x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean)*NUM_SAMPLES/(NUM_SAMPLES-1);

            Assert.assertEquals(sampleMean, theoreticalMean, 10 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, theoreticalVariance, 10e-4);
        }
    }
}
 
Example 4
Source File: AdaptiveMetropolisSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testBeta() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double a : Arrays.asList(10, 20, 30)) {
        for (final double b : Arrays.asList(10, 20, 30)) {

            final double theoreticalMean = a / (a + b);
            final double theoreticalVariance = a*b/((a+b)*(a+b)*(a+b+1));

            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(theoreticalVariance / NUM_SAMPLES);

            final Function<Double, Double> logPDF = x -> (a - 1) * Math.log(x) + (b - 1) * Math.log(1 - x);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_BETA_GUESS, INITIAL_STEP_SIZE, 0, 1);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);

            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x*x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean)*NUM_SAMPLES/(NUM_SAMPLES-1);

            Assert.assertEquals(sampleMean, theoreticalMean, 10 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, theoreticalVariance, 10e-4);
        }
    }
}
 
Example 5
Source File: AlleleFractionSegmenterUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));

    final double[] trueMinorAlleleFractions = new double[] {0.12, 0.32, 0.5};
    final double trueMemoryLength = 1e5;
    final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);

    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength/4);
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr2", chainLength, rng, trueMemoryLength/4));
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr3", chainLength, rng, trueMemoryLength/4));

    final int trueState = 2;    //fix everything to the same state 2
    final List<Double> minorAlleleFractionSequence = Collections.nCopies(positions.size(), trueMinorAlleleFractions[trueState]);

    final AllelicCountCollection counts = generateCounts(minorAlleleFractionSequence, positions, rng, trueParams);

    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();

    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
 
Example 6
Source File: BM.java    From pyramid with Apache License 2.0 6 votes vote down vote up
public BM(int numClusters, int dimension, long randomSeed) {
    this.numClusters = numClusters;
    this.dimension = dimension;
    this.distributions = new BernoulliDistribution[numClusters][dimension];
    this.mixtureCoefficients = new double[numClusters];
    Arrays.fill(mixtureCoefficients,1.0/numClusters);
    this.logMixtureCoefficients = new double[numClusters];
    Arrays.fill(logMixtureCoefficients,Math.log(1.0/numClusters));
    Random random = new Random(randomSeed);
    RandomGenerator randomGenerator = RandomGeneratorFactory.createRandomGenerator(random);
    UniformRealDistribution uniform = new UniformRealDistribution(randomGenerator, 0.25,0.75);
    for (int k=0;k<numClusters;k++){
        for (int d=0;d<dimension;d++){
            double p = uniform.sample();
            distributions[k][d] = new BernoulliDistribution(p);
        }
    }
    this.logClusterConditioinalForEmpty = new double[numClusters];
    updateLogClusterConditioinalForEmpty();

    this.names = new ArrayList<>(dimension);
    for (int d=0;d<dimension;d++){
        names.add(""+d);
    }
}
 
Example 7
Source File: AlleleFractionSegmenterUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSegmentation() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));

    final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
    final List<Double> trueMinorAlleleFractions = Arrays.asList(0.12, 0.32, 0.5);
    final double trueMemoryLength = 1e5;

    final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);

    final AlleleFractionHMM trueModel = new AlleleFractionHMM(trueMinorAlleleFractions, trueWeights,
            trueMemoryLength, AllelicPanelOfNormals.EMPTY_PON, trueParams);

    // randomly set positions
    final int chainLength = 10000;
    final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength/4);
    final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
    final List<Double> truthMinorFractions = trueStates.stream().map(trueModel::getMinorAlleleFraction).collect(Collectors.toList());
    final AllelicCountCollection counts = generateCounts(truthMinorFractions, positions, rng, trueParams);

    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    final double[] segmentMinorFractions = segments.stream()
            .flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMean()).stream())
            .mapToDouble(x->x).toArray();

    final double averageMinorFractionError = IntStream.range(0, truthMinorFractions.size())
            .mapToDouble(n -> Math.abs(segmentMinorFractions[n] - truthMinorFractions.get(n)))
            .average().getAsDouble();

    Assert.assertEquals(averageMinorFractionError, 0, 0.01);
}
 
Example 8
Source File: MathUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testRandomSelect() {
    final RandomGenerator rg = RandomGeneratorFactory.createRandomGenerator(new Random(13));
    final int NUM_SAMPLES = 1000;
    final List<Integer> choices = Arrays.asList(-1,0,1);
    final List<Integer> result = IntStream.range(0, NUM_SAMPLES)
            .map(n -> MathUtils.randomSelect(choices, j -> j*j/2.0, rg))
            .boxed()
            .collect(Collectors.toList());
    Assert.assertEquals(result.stream().filter(n -> n==0).count(), 0);
    Assert.assertEquals(result.stream().filter(n -> n == 1).count(), NUM_SAMPLES / 2, 50);
}
 
Example 9
Source File: CoverageDropoutDetectorTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Object[][] getUnivariateGaussianTargetsWithDropout(final double sigma, final double dropoutRate) {
    Random rng = new Random(337);
    final RandomGenerator randomGenerator = RandomGeneratorFactory.createRandomGenerator(rng);
    NormalDistribution n = new NormalDistribution(randomGenerator, 1, sigma);
    final int numDataPoints = 10000;
    final int numEventPoints = 2000;

    // Randomly select dropoutRate of targets and reduce by 25%-75% (uniformly distributed)
    UniformRealDistribution uniformRealDistribution = new UniformRealDistribution(randomGenerator, 0, 1.0);
    final List<ReadCountRecord.SingleSampleRecord> targetList = new ArrayList<>();
    for (int i = 0; i < numDataPoints; i++){
        double coverage = n.sample() + (i < (numDataPoints - numEventPoints) ? 0.0 : 0.5);
        if (uniformRealDistribution.sample() < dropoutRate) {
            double multiplier = .25 + uniformRealDistribution.sample()/2;
            coverage = coverage * multiplier;
        }
        targetList.add(new ReadCountRecord.SingleSampleRecord(new Target("arbitrary_name", new SimpleInterval("chr1", 100 + 2*i, 101 + 2 * i)), coverage));
    }

    HashedListTargetCollection<ReadCountRecord.SingleSampleRecord> targets = new HashedListTargetCollection<>(targetList);

    List<ModeledSegment> segments = new ArrayList<>();
    segments.add(new ModeledSegment(new SimpleInterval("chr1", 100, 16050), 8000, 1));
    segments.add(new ModeledSegment(new SimpleInterval("chr1", 16100, 20200), 2000, 1.5));

    return new Object [] []{ {targets, segments}};
}
 
Example 10
Source File: AlleleFractionModellerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMCMC() {
    final double meanBias = 1.2;
    final double biasVariance = 0.04;
    final double outlierProbability = 0.02;
    final AlleleFractionGlobalParameters globalParameters = new AlleleFractionGlobalParameters(meanBias, biasVariance, outlierProbability);
    final double minorAlleleFractionPriorAlpha = 1.;
    final AlleleFractionPrior prior = new AlleleFractionPrior(minorAlleleFractionPriorAlpha);
    final int numSegments = 50;
    final double averageHetsPerSegment = 50.;
    final double averageDepth = 50.;
    final int numSamples = 150;
    final int numBurnIn = 50;
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));

    final SampleLocatableMetadata metadata = new SimpleSampleLocatableMetadata(
            "test-sample",
            new SAMSequenceDictionary(IntStream.range(0, numSegments)
                    .mapToObj(i -> new SAMSequenceRecord("chr" + i + 1, 10000))
                    .collect(Collectors.toList())));
    final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(
            metadata, globalParameters, numSegments, averageHetsPerSegment, averageDepth, rng);

    final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getData().getAllelicCounts(), simulatedData.getData().getSegments(), prior);
    modeller.fitMCMC(numSamples, numBurnIn);

    assertAlleleFractionPosteriorCenters(modeller, simulatedData);
}
 
Example 11
Source File: CopyRatioSegmenterUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSegmentation() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));

    final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
    final List<Double> trueLog2CopyRatios = Arrays.asList(-2.0, 0.0, 1.4);
    final double trueMemoryLength = 1e5;
    final double trueStandardDeviation = 0.2;

    final CopyRatioHMM trueModel = new CopyRatioHMM(trueLog2CopyRatios, trueWeights,
            trueMemoryLength, trueStandardDeviation);

    final int chainLength = 10000;
    final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength/4);
    final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
    final List<Double> trueLog2CopyRatioSequence = trueStates.stream().map(n -> trueLog2CopyRatios.get(n)).collect(Collectors.toList());

    final List<Double> data = trueLog2CopyRatioSequence.stream()
            .map(cr -> generateData(trueStandardDeviation, cr, rng)).collect(Collectors.toList());

    final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
    final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x->x).toArray()));
    final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();

    final double[] segmentCopyRatios = segments.stream()
            .flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMeanInLog2CRSpace()).stream())
            .mapToDouble(x -> x).toArray();

    final double averageCopyRatioError = IntStream.range(0, trueLog2CopyRatioSequence.size())
            .mapToDouble(n -> Math.abs(segmentCopyRatios[n] - trueLog2CopyRatioSequence.get(n)))
            .average().getAsDouble();

    Assert.assertEquals(averageCopyRatioError, 0, 0.025);
}
 
Example 12
Source File: GATKProtectedMathUtilsTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testRandomSelect() {
    final RandomGenerator rg = RandomGeneratorFactory.createRandomGenerator(new Random(13));
    final int NUM_SAMPLES = 1000;
    final List<Integer> choices = Arrays.asList(-1,0,1);
    final List<Integer> result = IntStream.range(0, NUM_SAMPLES)
            .map(n -> GATKProtectedMathUtils.randomSelect(choices, j -> j*j/2.0, rg))
            .boxed()
            .collect(Collectors.toList());
    Assert.assertEquals(result.stream().filter(n -> n==0).count(), 0);
    Assert.assertEquals(result.stream().filter(n -> n == 1).count(), NUM_SAMPLES / 2, 50);
}
 
Example 13
Source File: GATKProtectedMathUtilsTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testRandomSelectFlatProbability() {
    final RandomGenerator rg = RandomGeneratorFactory.createRandomGenerator(new Random(13));
    final int NUM_SAMPLES = 1000;
    final List<Integer> choices = Arrays.asList(0,1,2);
    final List<Integer> result = IntStream.range(0, NUM_SAMPLES)
            .map(n -> GATKProtectedMathUtils.randomSelect(choices, j -> 1.0 / choices.size(), rg))
            .boxed()
            .collect(Collectors.toList());
    Assert.assertEquals(result.stream().filter(n -> n == 0).count(), NUM_SAMPLES / choices.size(), 50);
}
 
Example 14
Source File: AlleleFractionInitializerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testInitialization() {
    final double meanBias = 1.2;
    final double biasVariance = 0.04;
    final double outlierProbability = 0.02;
    final AlleleFractionGlobalParameters globalParameters = new AlleleFractionGlobalParameters(meanBias, biasVariance, outlierProbability);
    final int numSegments = 100;
    final double averageHetsPerSegment = 50.;
    final double averageDepth = 50.;
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));

    final SampleLocatableMetadata metadata = new SimpleSampleLocatableMetadata(
            "test-sample",
            new SAMSequenceDictionary(IntStream.range(0, numSegments)
                    .mapToObj(i -> new SAMSequenceRecord("chr" + i + 1, 1000))
                    .collect(Collectors.toList())));
    final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(
            metadata, globalParameters, numSegments, averageHetsPerSegment, averageDepth, rng);

    final AlleleFractionSegmentedData data = simulatedData.getData();
    final AlleleFractionState initializedState = new AlleleFractionInitializer(data).getInitializedState();

    Assert.assertEquals(initializedState.meanBias(), meanBias, ABSOLUTE_TOLERANCE);
    Assert.assertEquals(initializedState.biasVariance(), biasVariance, ABSOLUTE_TOLERANCE);
    Assert.assertEquals(initializedState.outlierProbability(), outlierProbability, ABSOLUTE_TOLERANCE);

    final double averageMinorFractionError = IntStream.range(0, numSegments)
            .mapToDouble(s -> Math.abs(initializedState.segmentMinorFraction(s) - simulatedData.getTrueState().segmentMinorFraction(s)))
            .average().getAsDouble();
    Assert.assertEquals(averageMinorFractionError, 0, ABSOLUTE_TOLERANCE);
}
 
Example 15
Source File: CoverageDropoutDetector.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
 * <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
 * <p>Returns null if no pass filtering.  Please note that in these cases,
 * in the rest of this class, we use this to assume that a GMM is not a good model.</p>
 *
 * @param segments  -- segments with segment mean in log2 copy ratio space
 * @param targets -- targets with a log2 copy ratio estimate
 * @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
 *                      in the evaluation
 * @param numComponents -- number of components to use in the GMM.  Usually, this is 2.
 * @return  never {@code null}.  Fitting result with indications whether it converged or was even attempted.
 */
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments,
                                                                                          final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents){

    // For each target in a segment that contains enough targets, normalize the difference against the segment mean
    //  and collapse the filtered targets into the copy ratio estimates.
    final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);

    if (filteredTargetsSegDiff.size() < numComponents) {
        return new MixtureMultivariateNormalFitResult(null, false, false);
    }

    // Assume that Apache Commons wants data points in the first dimension.
    // Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
    final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];

    // Convert the filtered targets into 2d array (even if second dimension is length 1).  The second dimension is
    //  uncorrelated Gaussian.  This is only to get around funny API in Apache Commons, which will throw an
    //  exception if the length of the second dimension is < 2
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
    for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
        filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
        filteredTargetsSegDiff2d[i][1] = nd.sample();
    }

    final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
    final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);

    try {
        multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
    } catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
        // We are done, we cannot make a fitting.  We should return a result that we attempted a fitting, but it
        //  did not converge.  Include the model as it was when the exception was thrown.
        return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
    }
    return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
 
Example 16
Source File: XHMMModel.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates a new model instance.
 *
 * @param eventStartProbability the probability per base pair of a transition from neutral to a CNV
 * @param meanEventSize the expectation of the distance between consecutive targets in an event
 * @param deletionMeanShift the deletion depth of coverage negative shift.
 * @param duplicationMeaShift the duplication depth of coverage positive shift.
 * @throws IllegalArgumentException if any of the model parameters has an invalid value.
 */
public XHMMModel(final double eventStartProbability,
                 final double meanEventSize,
                 final double deletionMeanShift,
                 final double duplicationMeaShift) {
    super(new XHMMEmissionProbabilityCalculator(
            deletionMeanShift,
            duplicationMeaShift,
            EMISSION_SD, RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED))),
            eventStartProbability,
            meanEventSize);
}
 
Example 17
Source File: MultidimensionalModellerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testMCMC() {
    final int numSegments = 25;
    final int numSamples = 150;
    final int numBurnIn = 50;
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));

    //copy-ratio model parameters
    final double varianceCR = 0.01;
    final double outlierProbabilityCR = 0.05;
    final double averageIntervalsPerSegment = 100.;

    //allele-fraction model parameters
    final double meanBiasAF = 1.2;
    final double biasVarianceAF = 0.04;
    final double outlierProbabilityAF = 0.02;
    final AlleleFractionGlobalParameters globalParametersAF = new AlleleFractionGlobalParameters(meanBiasAF, biasVarianceAF, outlierProbabilityAF);
    final double minorAlleleFractionPriorAlpha = 1.;
    final AlleleFractionPrior priorAF = new AlleleFractionPrior(minorAlleleFractionPriorAlpha);
    final double averageHetsPerSegment = 50.;
    final double averageDepthAF = 50.;

    //similar-segment merging parameters
    final int maxNumSmoothingIterations = 10;
    final int numSmoothingIterationsPerFit = 0;
    final double smoothingCredibleIntervalThresholdCopyRatio = 2.;
    final double smoothingCredibleIntervalThresholdAlleleFraction = 2.;

    //recall that both CR and AF data points are at loci 1, 2, 3, etc. and that each segment is on a different contig
    final SampleLocatableMetadata metadata = new SimpleSampleLocatableMetadata(
            "test-sample",
            new SAMSequenceDictionary(IntStream.range(0, numSegments)
                    .mapToObj(i -> new SAMSequenceRecord("chr" + i + 1, 10000))
                    .collect(Collectors.toList())));
    final CopyRatioSimulatedData simulatedDataCR = new CopyRatioSimulatedData(
            metadata, varianceCR, outlierProbabilityCR, numSegments, averageIntervalsPerSegment, rng);
    final AlleleFractionSimulatedData simulatedDataAF = new AlleleFractionSimulatedData(
            metadata, globalParametersAF, numSegments, averageHetsPerSegment, averageDepthAF, rng);

    //we introduce extra segments, which we will later merge to test similar-segment merging
    final MultidimensionalSegmentCollection oversegmentedSegments = new MultidimensionalSegmentCollection(
            metadata,
            constructOversegmentedSegments(simulatedDataCR, simulatedDataAF));

    final MultidimensionalModeller modeller = new MultidimensionalModeller(
            oversegmentedSegments,
            simulatedDataCR.getCopyRatios(),
            simulatedDataAF.getAllelicCounts(), priorAF,
            numSamples, numBurnIn, numSamples, numBurnIn);
    modeller.smoothSegments(maxNumSmoothingIterations, numSmoothingIterationsPerFit, smoothingCredibleIntervalThresholdCopyRatio, smoothingCredibleIntervalThresholdAlleleFraction);

    CopyRatioModellerUnitTest.assertCopyRatioPosteriorCenters(modeller.getCopyRatioModeller(), simulatedDataCR);
    AlleleFractionModellerUnitTest.assertAlleleFractionPosteriorCenters(modeller.getAlleleFractionModeller(), simulatedDataAF);
}
 
Example 18
Source File: KernelSegmenter.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Returns a list of the indices of the changepoints, either sorted by decreasing change to the global segmentation cost
 * or by increasing index order.
 * @param maxNumChangepoints                    maximum number of changepoints to return (first and last points do not count towards this number)
 * @param kernel                                kernel function used to calculate segment costs
 * @param kernelApproximationDimension          dimension of low-rank approximation to the kernel
 * @param windowSizes                           list of sizes to use for the flanking segments used to calculate local changepoint costs
 * @param numChangepointsPenaltyLinearFactor    factor A for penalty of the form A * C, where C is the number of changepoints
 * @param numChangepointsPenaltyLogLinearFactor factor B for penalty of the form B * C * log (N / C),
 *                                              where C is the number of changepoints and N is the number of data points
 * @param changepointSortOrder                  sort by decreasing change to the global segmentation cost or by increasing index order
 */
public List<Integer> findChangepoints(final int maxNumChangepoints,
                                      final BiFunction<DATA, DATA, Double> kernel,
                                      final int kernelApproximationDimension,
                                      final List<Integer> windowSizes,
                                      final double numChangepointsPenaltyLinearFactor,
                                      final double numChangepointsPenaltyLogLinearFactor,
                                      final ChangepointSortOrder changepointSortOrder) {
    ParamUtils.isPositiveOrZero(maxNumChangepoints, "Maximum number of changepoints must be non-negative.");
    ParamUtils.isPositive(kernelApproximationDimension, "Dimension of kernel approximation must be positive.");
    Utils.validateArg(!windowSizes.isEmpty(), "At least one window size must be provided.");
    Utils.validateArg(windowSizes.stream().allMatch(ws -> ws > 0), "Window sizes must all be positive.");
    Utils.validateArg(windowSizes.stream().distinct().count() == windowSizes.size(), "Window sizes must all be unique.");
    ParamUtils.isPositiveOrZero(numChangepointsPenaltyLinearFactor,
            "Linear factor for the penalty on the number of changepoints per chromosome must be non-negative.");
    ParamUtils.isPositiveOrZero(numChangepointsPenaltyLogLinearFactor,
            "Log-linear factor for the penalty on the number of changepoints per chromosome must be non-negative.");

    if (maxNumChangepoints == 0) {
        logger.warn("No changepoints were requested, returning an empty list...");
        return Collections.emptyList();
    }
    if (data.isEmpty()) {
        logger.warn("No data points were provided, returning an empty list...");
        return Collections.emptyList();
    }

    logger.debug(String.format("Finding up to %d changepoints in %d data points...", maxNumChangepoints, data.size()));
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));

    logger.debug("Calculating low-rank approximation to kernel matrix...");
    final RealMatrix reducedObservationMatrix = calculateReducedObservationMatrix(rng, data, kernel, kernelApproximationDimension);
    final double[] kernelApproximationDiagonal = calculateKernelApproximationDiagonal(reducedObservationMatrix);

    logger.debug(String.format("Finding changepoint candidates for all window sizes %s...", windowSizes.toString()));
    final List<Integer> changepointCandidates = findChangepointCandidates(
            data, reducedObservationMatrix, kernelApproximationDiagonal, maxNumChangepoints, windowSizes);

    logger.debug("Performing backward model selection on changepoint candidates...");
    return selectChangepoints(
            changepointCandidates, maxNumChangepoints, numChangepointsPenaltyLinearFactor, numChangepointsPenaltyLogLinearFactor,
            reducedObservationMatrix, kernelApproximationDiagonal).stream()
            .sorted((a, b) -> changepointSortOrder.equals(ChangepointSortOrder.INDEX) ? Integer.compare(a, b) : 0)    //if BACKWARD_SELECTION, simply retain original order from backward model selection
            .collect(Collectors.toList());
}
 
Example 19
Source File: CoverageModelParameters.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Generates random coverage model parameters.
 *
 * @param targetList list of targets
 * @param numLatents number of latent variables
 * @param seed random seed
 * @param randomMeanLogBiasStandardDeviation std of mean log bias (mean is set to 0)
 * @param randomBiasCovariatesStandardDeviation std of bias covariates (mean is set to 0)
 * @param randomMaxUnexplainedVariance max value of unexplained variance (samples are taken from a uniform
 *                                     distribution [0, {@code randomMaxUnexplainedVariance}])
 * @param initialBiasCovariatesARDCoefficients initial row vector of ARD coefficients
 * @return an instance of {@link CoverageModelParameters}
 */
public static CoverageModelParameters generateRandomModel(final List<Target> targetList,
                                                          final int numLatents,
                                                          final long seed,
                                                          final double randomMeanLogBiasStandardDeviation,
                                                          final double randomBiasCovariatesStandardDeviation,
                                                          final double randomMaxUnexplainedVariance,
                                                          final INDArray initialBiasCovariatesARDCoefficients) {
    Utils.validateArg(numLatents >= 0, "Dimension of the bias space must be non-negative");
    Utils.validateArg(randomBiasCovariatesStandardDeviation >= 0, "Standard deviation of random bias covariates" +
            " must be non-negative");
    Utils.validateArg(randomMeanLogBiasStandardDeviation >= 0, "Standard deviation of random mean log bias" +
            " must be non-negative");
    Utils.validateArg(randomMaxUnexplainedVariance >= 0, "Max random unexplained variance must be non-negative");
    Utils.validateArg(initialBiasCovariatesARDCoefficients == null || numLatents > 0 &&
            initialBiasCovariatesARDCoefficients.length() == numLatents, "If ARD is enabled, the dimension" +
            " of the bias latent space must be positive and match the length of ARD coeffecient vector");
    final boolean biasCovariatesEnabled = numLatents > 0;

    final int numTargets = targetList.size();
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(seed));

    /* Gaussian random for mean log bias */
    final INDArray initialMeanLogBias = Nd4j.create(getNormalRandomNumbers(
            numTargets, 0, randomMeanLogBiasStandardDeviation, rng), new int[] {1, numTargets});

    /* Uniform random for unexplained variance */
    final INDArray initialUnexplainedVariance = Nd4j.create(getUniformRandomNumbers(
            numTargets, 0, randomMaxUnexplainedVariance, rng), new int[] {1, numTargets});

    final INDArray initialMeanBiasCovariates;

    if (biasCovariatesEnabled) {
        /* Gaussian random for bias covariates */
        initialMeanBiasCovariates = Nd4j.create(getNormalRandomNumbers(numTargets * numLatents, 0,
                randomBiasCovariatesStandardDeviation, rng), new int[]{numTargets, numLatents});
    } else {
        initialMeanBiasCovariates = null;
    }

    return new CoverageModelParameters(targetList, initialMeanLogBias, initialUnexplainedVariance,
            initialMeanBiasCovariates, initialBiasCovariatesARDCoefficients);
}
 
Example 20
Source File: SimpleCopyRatioCallerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testMakeCalls() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    final double segmentNoise = 0.05;
    final double intervalLog2Noise = 0.2;
    final List<Double> segmentCopyRatios = Arrays.asList(2., 3., 1., 1., 0.25, 1., 5., 1., 0., 0.5);
    final List<Integer> numIntervalsPerSegment = Arrays.asList(10, 5, 5, 100, 10, 10, 20, 10, 10, 5);
    final SampleLocatableMetadata metadata = new SimpleSampleLocatableMetadata(
            "test-sample",
            new SAMSequenceDictionary(IntStream.range(0, segmentCopyRatios.size())
                    .mapToObj(i -> new SAMSequenceRecord("chr" + i + 1, 1000))
                    .collect(Collectors.toList())));
    final List<CalledCopyRatioSegment.Call> expectedCalls = Arrays.asList(
            AMPLIFICATION, AMPLIFICATION, NEUTRAL, NEUTRAL, DELETION, NEUTRAL, AMPLIFICATION, NEUTRAL, DELETION, DELETION);

    final List<CopyRatioSegment> segments = new ArrayList<>();
    for (int segmentIndex = 0; segmentIndex < numIntervalsPerSegment.size(); segmentIndex++) {
        final String contig = "chr" + segmentIndex + 1;
        final List<CopyRatio> intervalLog2CopyRatiosInSegment = new ArrayList<>(numIntervalsPerSegment.size());
        for (int intervalIndex = 0; intervalIndex < numIntervalsPerSegment.get(segmentIndex); intervalIndex++) {
            final double log2CopyRatioValue = ParamUtils.log2(Math.max(EPSILON,
                    segmentCopyRatios.get(segmentIndex) + rng.nextGaussian() * segmentNoise)) + intervalLog2Noise * rng.nextGaussian();
            intervalLog2CopyRatiosInSegment.add(new CopyRatio(
                    new SimpleInterval(contig, intervalIndex + 1, intervalIndex + 1), log2CopyRatioValue));
        }
        segments.add(new CopyRatioSegment(
                new SimpleInterval(contig, 1, numIntervalsPerSegment.get(segmentIndex)),
                intervalLog2CopyRatiosInSegment));
    }
    final CopyRatioSegmentCollection copyRatioSegments = new CopyRatioSegmentCollection(metadata, segments);

    final CalledCopyRatioSegmentCollection calledCopyRatioSegments =
            new SimpleCopyRatioCaller(copyRatioSegments,
                    NEUTRAL_SEGMENT_COPY_RATIO_LOWER_BOUND, NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND,
                    OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD, CALLING_COPY_RATIO_Z_SCORE_THRESHOLD)
                    .makeCalls();

    Assert.assertEquals(copyRatioSegments.getMetadata(), calledCopyRatioSegments.getMetadata());
    Assert.assertEquals(
            copyRatioSegments.getIntervals(), calledCopyRatioSegments.getIntervals());
    Assert.assertEquals(
            copyRatioSegments.getRecords().stream().map(CopyRatioSegment::getMeanLog2CopyRatio).collect(Collectors.toList()),
            calledCopyRatioSegments.getRecords().stream().map(CopyRatioSegment::getMeanLog2CopyRatio).collect(Collectors.toList()));
    Assert.assertEquals(
            calledCopyRatioSegments.getRecords().stream().map(CalledCopyRatioSegment::getCall).collect(Collectors.toList()),
            expectedCalls);
}