Java Code Examples for org.apache.commons.math3.stat.ranking.NaturalRanking#rank()

The following examples show how to use org.apache.commons.math3.stat.ranking.NaturalRanking#rank() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0

6 votes

private void forceNormalCovariates(ExpressionDataset datasetCovariates, ExpressionDataset datasetGenotypes) throws ArithmeticException {
	System.out.println("Enforcing normal distribution on covariates");

	NaturalRanking ranker = new NaturalRanking();

	for (int p = 0; p < datasetCovariates.nrProbes; p++) {
		//Rank order the expression values:
		double[] values = new double[datasetCovariates.nrSamples];
		for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
			values[s] = datasetCovariates.rawData[p][s];
		}
		double[] rankedValues = ranker.rank(values);
		//Replace the original expression value with the standard distribution enforce:
		for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
			//Convert the rank to a proportion, with range <0, 1>
			double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);
			//Convert the pValue to a Z-Score:
			double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue);
			datasetCovariates.rawData[p][s] = zScore; //Replace original expression value with the Z-Score
		}
	}
}

Example 2

Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0

6 votes

private void forceNormalExpressionData(ExpressionDataset datasetExpression) throws ArithmeticException {
	System.out.println("Enforcing normal distribution on expression data:");

	NaturalRanking ranker = new NaturalRanking();

	for (int p = 0; p < datasetExpression.nrProbes; p++) {
		//Rank order the expression values:
		double[] values = new double[datasetExpression.nrSamples];
		for (int s = 0; s < datasetExpression.nrSamples; s++) {
			values[s] = datasetExpression.rawData[p][s];
		}

		double[] rankedValues = ranker.rank(values);
		//Replace the original expression value with the standard distribution enforce:
		for (int s = 0; s < datasetExpression.nrSamples; s++) {
			//Convert the rank to a proportion, with range <0, 1>
			double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);
			//Convert the pValue to a Z-Score:
			double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue);
			datasetExpression.rawData[p][s] = zScore; //Replace original expression value with the Z-Score
		}
	}

	System.out.println("Expression data now force normal");
}

Example 3

Source File: XDataFrameRank.java From morpheus-core with Apache License 2.0

5 votes

/**
 * Returns the rank array for the values specified
 * @param values    the values to rank
 * @return          the ranks of input array
 */
static double[] rank(double[] values) {
    final NaNStrategy nanStrategy = (NaNStrategy)optionsMap.get(NaNStrategy.class).get(DataFrameOptions.getNanStrategy());
    final TiesStrategy tieStrategy = (TiesStrategy)optionsMap.get(TiesStrategy.class).get(DataFrameOptions.getTieStrategy());
    if (nanStrategy == null) throw new DataFrameException("Unsupported NaN strategy specified: " + DataFrameOptions.getNanStrategy());
    if (tieStrategy == null) throw new DataFrameException("Unsupported tie strategy specified: " + DataFrameOptions.getTieStrategy());
    final NaturalRanking ranking = new NaturalRanking(nanStrategy, tieStrategy);
    return ranking.rank(values);
}

Example 4

Source File: DenseVectors.java From cc-dbp with Apache License 2.0

4 votes

public static double[] toRanks(double[] x) {
	NaturalRanking ranking = new NaturalRanking();
	return ranking.rank(x);
}

Example 5

Source File: TestEQTLDatasetForInteractions.java From systemsgenetics with GNU General Public License v3.0

4 votes

private ExpressionDataset correctCovariateDataPCA(String[] covsToCorrect2, String[] covsToCorrect, ExpressionDataset datasetGenotypes, ExpressionDataset datasetCovariatesPCAForceNormal, int nrCompsToCorrectFor) throws Exception {

		System.out.println("Preparing data for testing eQTL effects of SNPs on covariate data:");
		System.out.println("Correcting covariate data for cohort specific effects:");

		ExpressionDataset datasetCovariatesToCorrectFor = new ExpressionDataset(covsToCorrect2.length + covsToCorrect.length + nrCompsToCorrectFor, datasetGenotypes.nrSamples);
		datasetCovariatesToCorrectFor.sampleNames = datasetGenotypes.sampleNames;

		// add covariates from the first list
		HashMap hashCovsToCorrect = new HashMap();

		// add covariates from the second list
		for (int i = 0; i < covsToCorrect2.length; ++i) {
			String cov = covsToCorrect2[i];
			hashCovsToCorrect.put(cov, null);
			Integer c = datasetCovariatesPCAForceNormal.hashProbes.get(cov);
			if (c == null) {
				throw new Exception("Covariate not found: " + cov);
			}
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				datasetCovariatesToCorrectFor.rawData[i][s] = datasetCovariatesPCAForceNormal.rawData[c][s];
			}
		}

		int[] covsToCorrectIndex = new int[covsToCorrect.length];
		for (int c = 0; c < covsToCorrect.length; c++) {
			hashCovsToCorrect.put(covsToCorrect[c], null);
			covsToCorrectIndex[c] = ((Integer) datasetCovariatesPCAForceNormal.hashProbes.get(covsToCorrect[c])).intValue();
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + c][s] = datasetCovariatesPCAForceNormal.rawData[covsToCorrectIndex[c]][s];
			}
		}

		// add PCs
		if (nrCompsToCorrectFor > 0) {
			for (int comp = 0; comp < nrCompsToCorrectFor; comp++) {
				for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
					datasetCovariatesToCorrectFor.rawData[covsToCorrect2.length + covsToCorrect.length + comp][s] = datasetCovariatesPCAForceNormal.rawData[datasetCovariatesPCAForceNormal.nrProbes - 51 + comp][s];
				}
			}
		}

		datasetCovariatesToCorrectFor.transposeDataset();

		datasetCovariatesToCorrectFor.save(inputDir + "/CovariatesToCorrectFor.txt");
		orthogonalizeDataset(inputDir + "/CovariatesToCorrectFor.txt");
		datasetCovariatesToCorrectFor = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.PrincipalComponents.txt");
		datasetCovariatesToCorrectFor.transposeDataset();
		ExpressionDataset datasetCovariatesToCorrectForEigenvalues = new ExpressionDataset(inputDir + "/CovariatesToCorrectFor.txt.Eigenvalues.txt");

		for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) {
			if (!hashCovsToCorrect.containsKey(datasetCovariatesPCAForceNormal.probeNames[p])) {
				for (int cov = 0; cov < datasetCovariatesToCorrectFor.nrProbes; cov++) {
					if (datasetCovariatesToCorrectForEigenvalues.rawData[cov][0] > 1E-5) {
						double[] rc = getLinearRegressionCoefficients(datasetCovariatesToCorrectFor.rawData[cov], datasetCovariatesPCAForceNormal.rawData[p]);
						for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
							datasetCovariatesPCAForceNormal.rawData[p][s] -= rc[0] * datasetCovariatesToCorrectFor.rawData[cov][s];
						}
					}
				}
				/*double stdev = JSci.maths.ArrayMath.standardDeviation(datasetCovariates.rawData[p]);
				 double mean = JSci.maths.ArrayMath.mean(datasetCovariates.rawData[p]);
				 if (stdev < 1E-5) {
				 for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				 datasetCovariatesPCAForceNormal.rawData[p][s] = mean;
				 }
				 }*/
			}
		}

		System.out.println("Enforcing normal distribution on covariates");

		NaturalRanking ranker = new NaturalRanking();

		for (int p = 0; p < datasetCovariatesPCAForceNormal.nrProbes; p++) {
			//Rank order the expression values:
			double[] values = new double[datasetCovariatesPCAForceNormal.nrSamples];
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				values[s] = datasetCovariatesPCAForceNormal.rawData[p][s];
			}
			double[] rankedValues = ranker.rank(values);
			//Replace the original expression value with the standard distribution enforce:
			for (int s = 0; s < datasetGenotypes.nrSamples; s++) {
				//Convert the rank to a proportion, with range <0, 1>
				double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);
				//Convert the pValue to a Z-Score:
				double zScore = cern.jet.stat.tdouble.Probability.normalInverse(pValue);
				datasetCovariatesPCAForceNormal.rawData[p][s] = zScore; //Replace original expression value with the Z-Score
			}
		}
		return datasetCovariatesPCAForceNormal;
	}

Example 6

Source File: DoubleMatrixDataset.java From systemsgenetics with GNU General Public License v3.0

3 votes

public DoubleMatrixDataset<R, C> createRowForceNormalDuplicate() {

		DoubleMatrixDataset<R, C> newDataset = new DoubleMatrixDataset<>(hashRows, hashCols);

		NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED,
				TiesStrategy.AVERAGE);

		for (int r = 0; r < matrix.rows(); ++r) {

			double[] row = matrix.viewRow(r).toArray();

			double mean = JSci.maths.ArrayMath.mean(row);
			double stdev = JSci.maths.ArrayMath.standardDeviation(row);

			double[] rankedValues = ranking.rank(row);

			for (int s = 0; s < matrix.columns(); s++) {
				double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);

				newDataset.setElementQuick(r, s, mean + cern.jet.stat.Probability.normalInverse(pValue) * stdev);
			}

		}

		return newDataset;

	}

Example 7

Source File: DoubleMatrixDataset.java From systemsgenetics with GNU General Public License v3.0

3 votes

public DoubleMatrixDataset<R, C> createColumnForceNormalDuplicate() {

		DoubleMatrixDataset<R, C> newDataset = new DoubleMatrixDataset<>(hashRows, hashCols);

		NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED,
				TiesStrategy.AVERAGE);

		for (int c = 0; c < matrix.columns(); ++c) {

			double[] col = matrix.viewColumn(c).toArray();

			double mean = JSci.maths.ArrayMath.mean(col);
			double stdev = JSci.maths.ArrayMath.standardDeviation(col);

			double[] rankedValues = ranking.rank(col);

			for (int s = 0; s < matrix.rows(); s++) {
				double pValue = (0.5d + rankedValues[s] - 1d) / (double) (rankedValues.length);

				newDataset.setElementQuick(s, c, mean + cern.jet.stat.Probability.normalInverse(pValue) * stdev);
			}

		}

		return newDataset;

	}