org.apache.commons.math3.stat.correlation.SpearmansCorrelation Java Examples

The following examples show how to use org.apache.commons.math3.stat.correlation.SpearmansCorrelation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StatsUtil.java    From MeteoInfo with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Computes Spearman's rank correlation for pairs of arrays or columns of a matrix.
 *
 * @param x X data
 * @param y Y data
 * @return Spearman's rank correlation
 */
public static Array spearmanr(Array x, Array y) {
    x = x.copyIfView();
    y = y.copyIfView();

    int m = x.getShape()[0];
    int n = 1;
    if (x.getRank() == 2)
        n = x.getShape()[1];
    double[][] aa = new double[m][n * 2];
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n * 2; j++) {
            if (j < n) {
                aa[i][j] = x.getDouble(i * n + j);
            } else {
                aa[i][j] = y.getDouble(i * n + j - n);
            }
        }
    }
    RealMatrix matrix = new Array2DRowRealMatrix(aa, false);
    SpearmansCorrelation cov = new SpearmansCorrelation(matrix);
    RealMatrix mcov = cov.getCorrelationMatrix();
    m = mcov.getColumnDimension();
    n = mcov.getRowDimension();
    Array r = Array.factory(DataType.DOUBLE, new int[]{m, n});
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n; j++) {
            r.setDouble(i * n + j, mcov.getEntry(i, j));
        }
    }

    return r;
}
 
Example #2
Source File: NumberColumnTest.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrelation() {
  double[] x = new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  double[] y = new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

  DoubleColumn xCol = DoubleColumn.create("x", x);
  DoubleColumn yCol = DoubleColumn.create("y", y);

  double resultP = xCol.pearsons(yCol);
  double resultS = xCol.spearmans(yCol);
  double resultK = xCol.kendalls(yCol);
  assertEquals(new PearsonsCorrelation().correlation(x, y), resultP, 0.0001);
  assertEquals(new SpearmansCorrelation().correlation(x, y), resultS, 0.0001);
  assertEquals(new KendallsCorrelation().correlation(x, y), resultK, 0.0001);
}
 
Example #3
Source File: NumberColumnTest.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrelation() {
  double[] x = new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  double[] y = new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

  DoubleColumn xCol = DoubleColumn.create("x", x);
  DoubleColumn yCol = DoubleColumn.create("y", y);

  double resultP = xCol.pearsons(yCol);
  double resultS = xCol.spearmans(yCol);
  double resultK = xCol.kendalls(yCol);
  assertEquals(new PearsonsCorrelation().correlation(x, y), resultP, 0.0001);
  assertEquals(new SpearmansCorrelation().correlation(x, y), resultS, 0.0001);
  assertEquals(new KendallsCorrelation().correlation(x, y), resultK, 0.0001);
}
 
Example #4
Source File: CorrelationCalculator.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public static double getSpearman(List<Double> list1, List<Double> list2)
{
	SpearmansCorrelation correlation = new SpearmansCorrelation();
	double c = correlation.correlation(getArray(list1),getArray(list2));
		
	return c;
}
 
Example #5
Source File: Similarity.java    From mzmine3 with GNU General Public License v2.0 4 votes vote down vote up
@Override
public double calc(double[][] data) {
  SpearmansCorrelation corr = new SpearmansCorrelation();
  return corr.correlation(col(data, 0), col(data, 1));
}
 
Example #6
Source File: TopNWordsCorrelation.java    From dkpro-c4corpus with Apache License 2.0 4 votes vote down vote up
/**
 * Computes Spearman correlation by comparing order of two corpora vocabularies
 *
 * @param goldCorpus  gold corpus
 * @param otherCorpus other corpus
 * @param topN        how many entries from the gold corpus should be taken
 * @throws IOException I/O exception
 */
public static void spearmanCorrelation(File goldCorpus, File otherCorpus,
        int topN)
        throws IOException
{
    LinkedHashMap<String, Integer> gold = loadCorpusToRankedVocabulary(
            new FileInputStream(goldCorpus));
    LinkedHashMap<String, Integer> other = loadCorpusToRankedVocabulary(
            new FileInputStream(otherCorpus));

    double[][] matrix = new double[topN][];

    if (gold.size() < topN) {
        throw new IllegalArgumentException(
                "topN (" + topN + ") cannot be greater than vocabulary size (" + gold.size()
                        + ")");
    }

    Iterator<Map.Entry<String, Integer>> iterator = gold.entrySet().iterator();
    int counter = 0;
    while (counter < topN) {
        Map.Entry<String, Integer> next = iterator.next();
        String goldWord = next.getKey();
        Integer goldValue = next.getValue();

        // look-up position in other corpus
        Integer otherValue = other.get(goldWord);
        if (otherValue == null) {
            //                System.err.println("Word " + goldWord + " not found in the other corpus");
            otherValue = Integer.MAX_VALUE;
        }

        matrix[counter] = new double[2];
        matrix[counter][0] = goldValue;
        matrix[counter][1] = otherValue;

        counter++;
    }

    RealMatrix realMatrix = new Array2DRowRealMatrix(matrix);

    SpearmansCorrelation spearmansCorrelation = new SpearmansCorrelation(realMatrix);
    double pValue = spearmansCorrelation.getRankCorrelation().getCorrelationPValues()
            .getEntry(0, 1);
    double correlation = spearmansCorrelation.getRankCorrelation().getCorrelationMatrix()
            .getEntry(0, 1);

    System.out.println("Gold: " + goldCorpus.getName());
    System.out.println("Other: " + otherCorpus.getName());
    System.out.printf(Locale.ENGLISH, "Top N:\n%d\nCorrelation\n%.3f\np-value\n%.3f\n", topN,
            correlation, pValue);
}
 
Example #7
Source File: CBC.java    From thunderstorm with GNU General Public License v3.0 4 votes vote down vote up
/**
 * If channel1 == channel2 (both res-tables or both gt-tables), avoid self-counting, i.e., distance to nearest neighbor must not be 0!
 * On the other hand if comparing res-table with gt-table then self-counting is allowed even if the data in the tables are the same.
 * */
private double[] calc(final double[][] mainChannelCoords, final KDTree<double[]> mainChannelTree, final KDTree<double[]> otherChannelTree, final double [][] neighborsInDistance, final double [] nearestNeighborDistances) {

    final int lastRadiusIndex = squaredRadiusDomain.length - 1;
    final double maxSquaredRadius = squaredRadiusDomain[lastRadiusIndex];

    final double[] cbcResults = new double[mainChannelCoords.length];
    final AtomicInteger count = new AtomicInteger(0);
    IJ.showProgress(0);
    Loop.withIndex(0, mainChannelCoords.length, new Loop.BodyWithIndex() {
        @Override
        public void run(int i) {
            try {
                double[] counts = calcNeighborCount(mainChannelCoords[i], mainChannelTree, squaredRadiusDomain, (firstChannelCoords == secondChannelCoords));
                for(int j = 0; j < counts.length; j++) {
                    counts[j] = counts[j] / counts[lastRadiusIndex] * maxSquaredRadius / squaredRadiusDomain[j];
                }

                double[] counts2 = calcNeighborCount(mainChannelCoords[i], otherChannelTree, squaredRadiusDomain, (firstChannelCoords == secondChannelCoords));
                nearestNeighborDistances[i] = getDistanceToNearestNeighbor(mainChannelCoords[i], otherChannelTree, (firstChannelCoords == secondChannelCoords));
                double maxCount = counts2[lastRadiusIndex];

                for(int j = 0; j < counts2.length; j++) {
                    neighborsInDistance[j][i] = counts2[j];
                    if(maxCount == 0) {
                        counts2[j] = 0;
                    } else {
                        counts2[j] = counts2[j] / maxCount * maxSquaredRadius / squaredRadiusDomain[j];
                    }
                }

                SpearmansCorrelation correlator = new SpearmansCorrelation();
                double correlation;
                try {
                    correlation = correlator.correlation(counts, counts2);
                } catch (NotANumberException e) {
                    correlation = Double.NaN;
                }
                double[] nearestNeighbor = otherChannelTree.nearest(mainChannelCoords[i]);
                double nnDistance = MathArrays.distance(nearestNeighbor, mainChannelCoords[i]);

                double result = correlation * MathProxy.exp(-nnDistance / MathProxy.sqrt(maxSquaredRadius));
                cbcResults[i] = result;
                if(i % 1024 == 0) {
                    IJ.showProgress((double)count.addAndGet(1024) / (double)(mainChannelCoords.length));
                }
            } catch(KeySizeException ex) {
                throw new RuntimeException(ex);
            }

        }
    });
    IJ.showProgress(1);
    return cbcResults;
}
 
Example #8
Source File: Similarity.java    From mzmine2 with GNU General Public License v2.0 4 votes vote down vote up
@Override
public double calc(double[][] data) {
  SpearmansCorrelation corr = new SpearmansCorrelation();
  return corr.correlation(col(data, 0), col(data, 1));
}
 
Example #9
Source File: Deconvolution.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Make the linear regression models and then do an Anova of the sum of
 * squares
 * 
 * Full model: Exp ~ celltype_1 + celltype_2 + ... + celltype_n +
 * celltype_1:Gt + celltype_2:Gt + ... + celltype_n:Gt <- without
 * intercept
 * 
 * Compare with anova to Exp ~ celltype_1 + celltype_2 + celtype_n +
 * celltype_1:Gt + celltype_2:Gt + .. + celltype_n-1 <- without
 * intercept Exp ~ celltype_1 + celltype_2 + celtype_n + celltype_1:Gt +
 * .. + celltype_n <- without intercept Exp ~ celltype_1 + celltype_2 +
 * celtype_n + celltype_2:Gt + .. + celltype_n <- without intercept
 *
 * 
 * @param expression A vector with the expression value per sample
 * 
 * @param genotypes A vector with the expression levels of all
 * samples for *one* eQTL-gene pair. This should include qtl names as in first column, and sample names in first row
 * 
 * @param qtlName Name of the QTL (usaully snp name + gene name)
 * 
 * @return A list with for each celltype a p-value for the celltype
 * specific eQTL for one eQTL
 */
private DeconvolutionResult deconvolution(double[] expression, double[] genotypes, String qtlName) throws RuntimeException, IllegalAccessException, 
																										  IOException {
	/** 
	 * If roundDosage option is selected on the command line, round of the dosage to closest integer -> 0.49 = 0, 0.51 = 1, 1.51 = 2. 
	 */
	if (commandLineOptions.getRoundDosage()) {
		for (int i = 0; i < genotypes.length; ++i) {
			if (commandLineOptions.getRoundDosage()){
				genotypes[i] = Math.round(genotypes[i]);
			}
		}
	}

	InteractionModelCollection interactionModelCollection = new InteractionModelCollection(cellCounts, 
																						   commandLineOptions.getGenotypeConfigurationType(),
																						   commandLineOptions.getUseOLS());
	interactionModelCollection.setQtlName(qtlName);
	interactionModelCollection.setGenotypes(genotypes);
	interactionModelCollection.setExpressionValues(expression);
	
	/**
	 * For each cell type model, e.g. ctModel 1 -> y = neut% + mono% + neut%:GT; ctModel 2 -> y = neut% + mono% + mono%:GT, one for each cell type, 
	 * where the interaction term (e.g mono%:GT) of the celltype:genotype to test is removed, calculate and save the observations in an observation vector
	 * where the observation vector for the example ctModel 1 is
	 *  
	 * 		celltypeModel = [[sample1_neut%, sample1_mono%, sample1_neut%*sample1_genotype], [sample2_neut%, sample2_mono%, sample2_neut%*sample2_genotype]]
	 *  
	 * with for each sample a cellcount percentage for each cell type and the genotype of the QTL that is being testetd. 
	 * 
	 * Using this observation vector calculate the sum of squares and test with Anova if it is significantly different from the sum of squares of the full model. 
	 * Here the full model includes all interaction terms of the cell type models, e.g. fullModel -> y = neut% + mono% + neut%:GT + mono%:GT so the observation vector
	 * 
	 * 		fullModel = [[sample1_neut%, sample1_mono%, sample1_neut%*sample1_genotype, sample1_mono%*sample1_genotype], [sample2_neut%, ..., etc]]
	 * 
	 */
	interactionModelCollection.createObservedValueMatricesFullModel(commandLineOptions.getAddGenotypeTerm());
	interactionModelCollection.findBestFullModel();		
	interactionModelCollection.createObservedValueMatricesCtModels(commandLineOptions.getAddGenotypeTerm());
	interactionModelCollection.findBestCtModel();
	calculateDeconvolutionPvalue(interactionModelCollection);

	double wholeBloodQTL = 0;
	double wholeBloodQTLpvalue = 0;
	if(commandLineOptions.getWholeBloodQTL()){
		// if true calculate spearman correlation between genotypes and expression values (i.e. whole blood eQTL)
		wholeBloodQTL = new SpearmansCorrelation().correlation(interactionModelCollection.getGenotypes(), interactionModelCollection.getExpessionValues());
		wholeBloodQTLpvalue = Statistics.calculateSpearmanTwoTailedPvalue(wholeBloodQTL, cellCounts.getNumberOfSamples());
	}
	DeconvolutionResult deconResult =  new DeconvolutionResult();

	interactionModelCollection.cleanUp(!commandLineOptions.getOutputPredictedExpression());
	deconResult = new DeconvolutionResult(interactionModelCollection, wholeBloodQTL, wholeBloodQTLpvalue);
	return deconResult;
}
 
Example #10
Source File: NumericColumn.java    From tablesaw with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the Spearman's Rank correlation between the receiver and the otherColumn
 *
 * @param otherColumn A NumberColumn with no missing values
 * @throws NotANumberException if either column contains any missing values
 */
default double spearmans(NumericColumn<?> otherColumn) {
  double[] x = asDoubleArray();
  double[] y = otherColumn.asDoubleArray();
  return new SpearmansCorrelation().correlation(x, y);
}
 
Example #11
Source File: NumericColumn.java    From tablesaw with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the Spearman's Rank correlation between the receiver and the otherColumn
 *
 * @param otherColumn A NumberColumn with no missing values
 * @throws NotANumberException if either column contains any missing values
 */
default double spearmans(NumericColumn<?> otherColumn) {
  double[] x = asDoubleArray();
  double[] y = otherColumn.asDoubleArray();
  return new SpearmansCorrelation().correlation(x, y);
}