org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor Java Examples

The following examples show how to use org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PreprocClassifier.java    From buffer_bci with GNU General Public License v3.0 6 votes vote down vote up
public ClassifierResult apply(Matrix data){
  if ( VERB>1 ) System.out.println(TAG+ " preproc");
  // Do the standard pre-processing
  data = preproc(data);
  
  // Linearly classifying the data
  if ( VERB>1 ) System.out.println(TAG+  "Classifying with linear classifier");
  Matrix fraw = applyLinearClassifier(data, 0);
  if ( VERB>1 ) System.out.println(TAG+  "Results from the classifier (fraw): " + fraw.toString());
  Matrix f = new Matrix(fraw.copy());
  Matrix p = new Matrix(f.copy());
  // map to probabilities using the logistic operator
  p.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
			 public double visit(int row, int column, double value) {
              return 1. / (1. + Math.exp(-value));
			 }
		});
  if ( VERB>=0 ) System.out.println(TAG+  "Results from the classifier (p): " + p.toString());
  return new ClassifierResult(f, fraw, p, data);		  
}
 
Example #2
Source File: ERSPClassifier.java    From buffer_bci with GNU General Public License v3.0 6 votes vote down vote up
@Override
  public ClassifierResult apply(Matrix data) {	
  if ( VERB>0 ) System.out.println("ERSP apply");
  // Do the standard pre-processing
  data = preproc(data);
  
  // Linearly classifying the data
  if( VERB>1 ) System.out.println(TAG+ "Classifying with linear classifier");
  Matrix fraw = applyLinearClassifier(data, 0);
  if( VERB>1 ) System.out.println(TAG+ "Results from the classifier (fraw): " + fraw.toString());
  Matrix f = new Matrix(fraw.copy());
  Matrix p = new Matrix(f.copy());
  p.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
			 public double visit(int row, int column, double value) {
              return 1. / (1. + Math.exp(-value));
			 }
		});
  if ( VERB>1 ) System.out.println(TAG+ "Results from the classifier (p): " + p.toString());
  return new ClassifierResult(f, fraw, p, data);
}
 
Example #3
Source File: GCBiasCorrectorUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static Pair<RealMatrix, double[]> simulateData(final int numSamples,
                                                       final int numIntervals) {
    final Random random = new Random(RANDOM_SEED);
    final double[] intervalGCContent = IntStream.range(0, numIntervals)
            .mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian())
            .map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05))
            .toArray();
    final double[] intervalGCBias = Arrays.stream(intervalGCContent)
            .map(QUADRATIC_GC_BIAS_CURVE::apply)
            .toArray();

    //model GC bias along with a small random amount of uniform non-GC-bias noise;
    //remaining noise after GC-bias correction should be only arise from the latter
    final RealMatrix readCounts = new Array2DRowRealMatrix(numSamples, numIntervals);
    readCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int sample, final int interval, final double value) {
            return (int) (MEAN_READ_DEPTH * intervalGCBias[interval] * (1. + NON_GC_BIAS_NOISE_LEVEL * random.nextDouble()));
        }
    });
    return new ImmutablePair<>(readCounts, intervalGCContent);
}
 
Example #4
Source File: PCATangentNormalizationUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculate the beta-hats that best fit case read counts given the panel of normals.
 *
 * @param normalsPseudoinverse the log-normalized or reduced-panel pseudoinverse from a panel of normals
 * @param input a {@code TxS} matrix where {@code T} is the number of targets and {@code S} the number of count groups (e.g. case samples).
 * @return never {@code null} an {@code NxS} matrix, where N is the number of samples in
 *  the panel and S the original name of count groups.
 */
@VisibleForTesting
public static RealMatrix calculateBetaHats(final RealMatrix normalsPseudoinverse,
                                           final RealMatrix input,
                                           final double epsilon) {
    Utils.nonNull(normalsPseudoinverse, "Normals inverse matrix cannot be null.");
    Utils.nonNull(input, "Input counts cannot be null.");
    Utils.validateArg(epsilon > 0, String.format("Invalid epsilon value, must be > 0: %f", epsilon));
    final double targetThreshold = (Math.log(epsilon) / Math.log(2)) + 1;

    // copy case samples in order to mask targets in-place and mask (set to zero) targets with coverage below threshold
    final RealMatrix maskedInput = input.copy();
    maskedInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) {
            return value > targetThreshold ? value : 0;
        }
    });

    return normalsPseudoinverse.multiply(maskedInput);
}
 
Example #5
Source File: HDF5PCACoveragePoNCreationUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Final pre-panel normalization that consists of dividing all counts by the median of
 * its column and log it with base 2.
 * <p>
 *     The normalization occurs in-place.
 * </p>
 *
 * @param readCounts the input counts to normalize.
 */
@VisibleForTesting
static void normalizeAndLogReadCounts(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();

    final double[] medians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(col -> medianCalculator.evaluate(counts.getColumn(col))).toArray();
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) {
            return Math.log(Math.max(EPSILON, value / medians[column])) * INV_LN_2;
        }
    });

    logger.info("Counts normalized by the column median and log2'd.");
}
 
Example #6
Source File: HDF5PCACoveragePoNCreationUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculates the median of column medians and subtract it from all counts.
 * @param readCounts the input counts to center.
 * @return the median of medians that has been subtracted from all counts.
 */
@VisibleForTesting
static double subtractMedianOfMedians(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Median medianCalculator = new Median();
    final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);

    final double medianOfMedians = medianCalculator.evaluate(columnMedians);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) {
            return value - medianOfMedians;
        }
    });
    logger.info(String.format("Counts centered around the median of medians %.2f", medianOfMedians));
    return medianOfMedians;
}
 
Example #7
Source File: GCBiasSimulatedData.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
    final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
    final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);

    final Random random = new Random(13);
    final double[] gcContentByTarget = IntStream.range(0, numTargets)
            .mapToDouble(n -> 0.5 + 0.2*random.nextGaussian())
            .map(x -> Math.min(x,0.95)).map(x -> Math.max(x,0.05)).toArray();
    final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();

    // model mainly GC bias with a small random amount of non-GC bias
    // thus noise after GC correction should be nearly zero
    final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int target, final int column, final double value) {
            return gcBiasByTarget[target]*(1.0 + 0.01*random.nextDouble());
        }
    });
    final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
    return new ImmutablePair<>(rcc, gcContentByTarget);
}
 
Example #8
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #9
Source File: HDF5UtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static RealMatrix createMatrixOfGaussianValues(final int numRows,
                                                       final int numColumns,
                                                       final double mean,
                                                       final double sigma) {
    final RealMatrix largeMatrix = new Array2DRowRealMatrix(numRows, numColumns);
    final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
    randomDataGenerator.reSeed(337337337);
    largeMatrix.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            return randomDataGenerator.nextGaussian(mean, sigma);
        }
    });
    return largeMatrix;
}
 
Example #10
Source File: HDF5LibraryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private RealMatrix createMatrixOfGaussianValues(int numRows, int numCols, final double mean, final double sigma) {
    final RealMatrix bigCounts = new Array2DRowRealMatrix(numRows, numCols);
    final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
    randomDataGenerator.reSeed(337337337);
    bigCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            return randomDataGenerator.nextGaussian(mean, sigma);
        }
    });
    return bigCounts;
}
 
Example #11
Source File: SomaticGenotypingEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static <EVIDENCE> RealMatrix getAsRealMatrix(final LikelihoodMatrix<EVIDENCE, Allele> matrix) {
    final RealMatrix result = new Array2DRowRealMatrix(matrix.numberOfAlleles(), matrix.evidenceCount());
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            return matrix.get(row, column);
        }
    });
    return result;
}
 
Example #12
Source File: SVDDenoisingUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void transformToFractionalCoverage(final RealMatrix matrix) {
    logger.info("Transforming read counts to fractional coverage...");
    final double[] sampleSums = IntStream.range(0, matrix.getRowDimension())
            .mapToDouble(r -> MathUtils.sum(matrix.getRow(r))).toArray();
    matrix.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int sampleIndex, int intervalIndex, double value) {
            return value / sampleSums[sampleIndex];
        }
    });
}
 
Example #13
Source File: SVDDenoisingUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Preprocess (i.e., transform to fractional coverage and correct GC bias)
 * and standardize read counts for samples when no panel of normals is available.
 * The original {@code readCounts} has dimensions 1 x intervals and is not modified.
 * If {@code intervalGCContent} is null, GC-bias correction will not be performed
 */
public static RealMatrix preprocessAndStandardizeSample(final double[] readCounts,
                                                        final double[] intervalGCContent) {
    Utils.nonNull(readCounts);
    Utils.validateArg(intervalGCContent == null || readCounts.length == intervalGCContent.length,
            "Number of intervals for read counts must match those for GC-content annotations.");

    RealMatrix result = new Array2DRowRealMatrix(new double[][]{readCounts});

    //preprocess (transform to fractional coverage, correct GC bias) copy in place
    logger.info("Preprocessing read counts...");
    transformToFractionalCoverage(result);
    performOptionalGCBiasCorrection(result, intervalGCContent);
    logger.info("Sample read counts preprocessed.");

    //standardize copy in place
    logger.info("Standardizing read counts...");
    divideBySampleMedianAndTransformToLog2(result);
    logger.info("Subtracting sample median...");
    final double[] sampleLog2Medians = MatrixSummaryUtils.getRowMedians(result);
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int sampleIndex, int intervalIndex, double value) {
            return value - sampleLog2Medians[sampleIndex];
        }
    });
    logger.info("Sample read counts standardized.");

    return result;
}
 
Example #14
Source File: SVDDenoisingUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Preprocess (i.e., transform to fractional coverage, correct GC bias, filter, impute, and truncate)
 * and standardize read counts from a panel of normals.
 * All inputs are assumed to be valid.
 * The dimensions of {@code readCounts} should be samples x intervals.
 * To reduce memory footprint, {@code readCounts} is modified in place when possible.
 * Filtering is performed by using boolean arrays to keep track of intervals and samples
 * that have been filtered at any step and masking {@code readCounts} with them appropriately.
 * If {@code intervalGCContent} is null, GC-bias correction will not be performed.
 */
static PreprocessedStandardizedResult preprocessAndStandardizePanel(final RealMatrix readCounts,
                                                                    final double[] intervalGCContent,
                                                                    final double minimumIntervalMedianPercentile,
                                                                    final double maximumZerosInSamplePercentage,
                                                                    final double maximumZerosInIntervalPercentage,
                                                                    final double extremeSampleMedianPercentile,
                                                                    final boolean doImputeZeros,
                                                                    final double extremeOutlierTruncationPercentile) {
    //preprocess (transform to fractional coverage, correct GC bias, filter, impute, truncate) and return copy of submatrix
    logger.info("Preprocessing read counts...");
    final PreprocessedStandardizedResult preprocessedStandardizedResult = preprocessPanel(readCounts, intervalGCContent,
            minimumIntervalMedianPercentile, maximumZerosInSamplePercentage, maximumZerosInIntervalPercentage,
            extremeSampleMedianPercentile, doImputeZeros, extremeOutlierTruncationPercentile);
    logger.info("Panel read counts preprocessed.");

    //standardize in place
    logger.info("Standardizing read counts...");
    divideBySampleMedianAndTransformToLog2(preprocessedStandardizedResult.preprocessedStandardizedValues);
    logger.info("Subtracting median of sample medians...");
    final double[] sampleLog2Medians = MatrixSummaryUtils.getRowMedians(preprocessedStandardizedResult.preprocessedStandardizedValues);
    final double medianOfSampleMedians = new Median().evaluate(sampleLog2Medians);
    preprocessedStandardizedResult.preprocessedStandardizedValues
            .walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int sampleIndex, int intervalIndex, double value) {
            return value - medianOfSampleMedians;
        }
    });
    logger.info("Panel read counts standardized.");

    return preprocessedStandardizedResult;
}
 
Example #15
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #16
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #17
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #18
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() throws Exception {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #19
Source File: ReadCountCollectionUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Impute zero counts to the median of non-zero values in the enclosing target row.
 *
 * <p>The imputation is done in-place, thus the input matrix is well be modified as a result of this call.</p>
 *
 * @param readCounts the input and output read-count matrix.
 */
public static void imputeZeroCountsAsTargetMedians(final ReadCountCollection readCounts, final Logger logger) {

    final RealMatrix counts = readCounts.counts();
    final int targetCount = counts.getRowDimension();

    final Median medianCalculator = new Median();
    int totalCounts = counts.getColumnDimension() * counts.getRowDimension();

    // Get the number of zeroes contained in the counts.
    final long totalZeroCounts = IntStream.range(0, targetCount)
            .mapToLong(t -> DoubleStream.of(counts.getRow(t))
                    .filter(c -> c == 0.0).count()).sum();

    // Get the median of each row, not including any entries that are zero.
    final double[] medians = IntStream.range(0, targetCount)
            .mapToDouble(t -> medianCalculator.evaluate(
                    DoubleStream.of(counts.getRow(t))
                            .filter(c -> c != 0.0)
                            .toArray()
            )).toArray();

    // Change any zeros in the counts to the median for the row.
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) {
            return value != 0 ? value : medians[row];
        }
    });

    if (totalZeroCounts > 0) {
        final double totalZeroCountsPercentage = 100.0 * (totalZeroCounts / totalCounts);
        logger.info(String.format("Some 0.0 counts (%d out of %d, %.2f%%) were imputed to their enclosing target " +
                "non-zero median", totalZeroCounts, totalZeroCounts, totalZeroCountsPercentage));
    } else {
        logger.info("No count is 0.0 thus no count needed to be imputed.");
    }
}
 
Example #20
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #21
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #22
Source File: OLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testPerfectFit() {
    double[] betaHat = regression.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat,
                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
                           1e-14);
    double[] residuals = regression.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
                           1e-14);
    RealMatrix errors =
        new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
    final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
    RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
    referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            if (row == 0) {
                return s[column];
            }
            double x = s[row] * s[column];
            return (row == column) ? 2 * x : x;
        }
    });
   Assert.assertEquals(0.0,
                 errors.subtract(referenceVariance).getNorm(),
                 5.0e-16 * referenceVariance.getNorm());
   Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
}
 
Example #23
Source File: HDF5LibraryUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private RealMatrix createMatrixOfGaussianValues(int numRows, int numCols, final double mean, final double sigma) {
    final RealMatrix bigCounts = new Array2DRowRealMatrix(numRows, numCols);
    final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
    randomDataGenerator.reSeed(337337337);
    bigCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            return randomDataGenerator.nextGaussian(mean, sigma);
        }
    });
    return bigCounts;
}
 
Example #24
Source File: HDF5PCACoveragePoNCreationUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * SVD and Pseudo inverse calculation.
 *
 * @param logNormalized the input counts for the SVD and reduction steps, fully normalized and already logged.
 * @param requestedNumberOfEigensamples user requested number of eigensamples for the reduced panel.
 * @return never {@code null}.
 */
@VisibleForTesting
static ReductionResult calculateReducedPanelAndPInverses(final ReadCountCollection logNormalized,
                                                         final OptionalInt requestedNumberOfEigensamples,
                                                         final Logger logger,
                                                         final JavaSparkContext ctx) {

    if (ctx == null) {
        logger.warn("No Spark context provided, not going to use Spark...");
    }

    final RealMatrix logNormalizedCounts = logNormalized.counts();
    final int numberOfCountColumns = logNormalizedCounts.getColumnDimension();

    logger.info("Starting the SVD decomposition of the log-normalized counts ...");
    final long svdStartTime = System.currentTimeMillis();
    final SVD logNormalizedSVD = SVDFactory.createSVD(logNormalized.counts(), ctx);
    final long svdEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished the SVD decomposition of the log-normal counts. Elapse of %d seconds", (svdEndTime - svdStartTime) / 1000));

    final int numberOfEigensamples = determineNumberOfEigensamples(requestedNumberOfEigensamples, numberOfCountColumns, logNormalizedSVD, logger);
    logger.info(String.format("Including %d eigensamples in the reduced PoN", numberOfEigensamples));

    final double[] singularValues = logNormalizedSVD.getSingularValues();
    final RealMatrix reducedCounts = logNormalizedSVD.getU().getSubMatrix(0, logNormalizedCounts.getRowDimension()-1, 0, numberOfEigensamples - 1).copy();
    reducedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) { return singularValues[column]*value; }
    });

    // The Pseudo inverse comes nearly for free from having run the SVD decomposition.
    final RealMatrix logNormalizedPseudoInverse = logNormalizedSVD.getPinv();

    logger.info("Calculating the reduced PoN inverse matrix...");
    final long riStartTime = System.currentTimeMillis();
    final RealMatrix reducedCountsPseudoInverse = SVDFactory.createSVD(reducedCounts, ctx).getPinv();
    final long riEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished calculating the reduced PoN inverse matrix. Elapse of %d seconds", (riEndTime - riStartTime) / 1000));
    return new ReductionResult(logNormalizedPseudoInverse, reducedCounts, reducedCountsPseudoInverse, logNormalizedSVD.getSingularValues());
}
 
Example #25
Source File: PCATangentNormalizationUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Target-factor normalizes a {@link RealMatrix} in-place given target factors..
 */
static void factorNormalize(final RealMatrix input, final double[] targetFactors) {
    Utils.nonNull(input, "Input matrix cannot be null.");
    Utils.nonNull(targetFactors, "Target factors cannot be null.");
    Utils.validateArg(targetFactors.length == input.getRowDimension(),
            "Number of target factors does not correspond to the number of rows.");
    // Divide all counts by the target factor for the row.
    input.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) {
            return value / targetFactors[row];
        }
    });
}
 
Example #26
Source File: SomaticGenotypingEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static RealMatrix getAsRealMatrix(final LikelihoodMatrix<Allele> matrix) {
    final RealMatrix result = new Array2DRowRealMatrix(matrix.numberOfAlleles(), matrix.numberOfReads());
    result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            return matrix.get(row, column);
        }
    });
    return result;
}
 
Example #27
Source File: XHMMSegmentCallerBase.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Standardize read counts (per-sample).
 * Note: modification is done in-place.
 *
 * @param counts original read counts
 */
private void standardizeBySample(final RealMatrix counts) {
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(counts);
    final double[] columnStdDev = GATKProtectedMathUtils.columnStdDevs(counts);

    counts.walkInColumnOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(int row, int column, double value) {
            return (value - columnMeans[column]) / columnStdDev[column];
        }

    });
}
 
Example #28
Source File: XHMMSegmentCallerBase.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Standardize read counts (per-target).
 * Note: modification is done in-place.
 *
 * @param counts original read counts
 */
private void standardizeByTarget(final RealMatrix counts) {
    final double[] rowMeans = GATKProtectedMathUtils.rowMeans(counts);
    final double[] rowStdDev = GATKProtectedMathUtils.rowStdDevs(counts);

    counts.walkInColumnOrder(new DefaultRealMatrixChangingVisitor() {
        @Override
        public double visit(final int row, final int column, final double value) {
            return (value - rowMeans[row]) / rowStdDev[row];
        }

    });
}