org.apache.commons.math3.distribution.MultivariateNormalDistribution Java Examples

The following examples show how to use org.apache.commons.math3.distribution.MultivariateNormalDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File:    From super-cloudops with Apache License 2.0 7 votes vote down vote up
public static void multivariateNormalDistributionTest3() {
	final double[] mu = { 0, 0 };
	final double[][] sigma = { { 2, -1.1 }, { -1.1, 2 } };
	final MultivariateNormalDistribution mnd = new MultivariateNormalDistribution(mu, sigma);
	System.out.println(mnd.getCovariances().getEntry(1, 0));
	System.out.println(mnd.density(new double[] { 1d, 1d }));
	System.out.println(mnd.density(new double[] { 1d, 2d }));
	System.out.println(mnd.density(new double[] { 1d, 3d }));
	System.out.println(mnd.density(new double[] { 2d, 2d }));
	System.out.println(mnd.density(new double[] { 2d, 3d }));
	System.out.println(mnd.density(new double[] { -2d, 3d }));
	System.out.println(mnd.density(new double[] { -2d, -1d }));
	System.out.println(mnd.density(new double[] { 1d, 2d }));
	System.out.println(mnd.density(new double[] { 250d, 150d }));
Example #2
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public Object doWork(Object first, Object second) throws IOException{
  if(null == first){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
  if(null == second){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));

  List<Number> means = (List<Number>)first;
  Matrix covar = (Matrix)second;

  double[] m = new double[means.size()];
  for(int i=0; i< m.length; i++) {
    m[i] = means.get(i).doubleValue();

  return new MultivariateNormalDistribution(m, covar.getData());
Example #3
Source File:    From macrobase with Apache License 2.0 6 votes vote down vote up
public void consume(List<Datum> records) throws Exception {
    if (!hasConsumed) {
        n = records.get(0).metrics().getDimension();
        mean = new ArrayRealVector(n);
        covV = new ArrayRealVector(n, 1d/n);
        covM = new DiagonalMatrix(covV.toArray());
        mnd = new MultivariateNormalDistribution(mean.toArray(), covM.getData());
        randomProjectionMatrix = new BlockRealMatrix(mnd.sample(k));
        hasConsumed = true;
    for (Datum d: records){
        metricVector = d.metrics();
        transformedVector = randomProjectionMatrix.operate(metricVector);
        output.add(new Datum(d,transformedVector));
Example #4
Source File:    From macrobase with Apache License 2.0 6 votes vote down vote up
public void testFitGaussian() {
    MultivariateNormalDistribution mvNormal = getSample3dNormal();

    int N = 1000000;
    int k = 3;
    List<double[]> testData = new ArrayList<>(N);
    for (int i = 0; i < N; i++) {

    long startTime = System.currentTimeMillis();
    Gaussian fitted = new Gaussian().fit(testData);
    long endTime = System.currentTimeMillis();
    log.debug("Fitted {} in: {}", N, endTime - startTime);
    assertArrayEquals(mvNormal.getMeans(), fitted.getMean(), 0.01);
    for (int i = 0; i < k; i++) {
                fitted.getCovariance().getRow(i), 0.05);
Example #5
Source File:    From astor with GNU General Public License v2.0 5 votes vote down vote up
public void testInitialMixture() {
    // Testing initial mixture estimated from data
    double[] correctWeights = new double[] { 0.5, 0.5 };

    MultivariateNormalDistribution[] correctMVNs = new MultivariateNormalDistribution[2];

    correctMVNs[0] = new MultivariateNormalDistribution(new double[] {
                    -0.0021722935000328823, 3.5432892936887908 },
                    new double[][] {
                            { 4.537422569229048, 3.5266152281729304 },
                            { 3.5266152281729304, 6.175448814169779 } });
    correctMVNs[1] = new MultivariateNormalDistribution(new double[] {
                    5.090902706507635, 8.68540656355283 }, new double[][] {
                    { 2.886778573963039, 1.5257474543463154 },
                    { 1.5257474543463154, 3.3794567673616918 } });

    final MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(getTestSamples(), 2);

    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : initialMix
            .getComponents()) {
        Assert.assertEquals(correctWeights[i], component.getFirst(),
        Assert.assertEquals(correctMVNs[i], component.getSecond());
Example #6
Source File:    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test(expected = DimensionMismatchException.class)
public void testIncompatibleIntialMixture() {
    // Data has 3 columns
    double[][] data = new double[][] {
            { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 }
    double[] weights = new double[] { 0.5, 0.5 };

    // These distributions are compatible with 2-column data, not 3-column
    // data
    MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[2];

    mvns[0] = new MultivariateNormalDistribution(new double[] {
                    -0.0021722935000328823, 3.5432892936887908 },
                    new double[][] {
                            { 4.537422569229048, 3.5266152281729304 },
                            { 3.5266152281729304, 6.175448814169779 } });
    mvns[1] = new MultivariateNormalDistribution(new double[] {
                    5.090902706507635, 8.68540656355283 }, new double[][] {
                    { 2.886778573963039, 1.5257474543463154 },
                    { 1.5257474543463154, 3.3794567673616918 } });

    // Create components and mixture
    List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[0], mvns[0]));
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[1], mvns[1]));

    MixtureMultivariateNormalDistribution badInitialMix
        = new MixtureMultivariateNormalDistribution(components);

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);;
Example #7
Source File:    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test(expected = DimensionMismatchException.class)
public void testIncompatibleIntialMixture() {
    // Data has 3 columns
    double[][] data = new double[][] {
            { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 }
    double[] weights = new double[] { 0.5, 0.5 };

    // These distributions are compatible with 2-column data, not 3-column
    // data
    MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[2];

    mvns[0] = new MultivariateNormalDistribution(new double[] {
                    -0.0021722935000328823, 3.5432892936887908 },
                    new double[][] {
                            { 4.537422569229048, 3.5266152281729304 },
                            { 3.5266152281729304, 6.175448814169779 } });
    mvns[1] = new MultivariateNormalDistribution(new double[] {
                    5.090902706507635, 8.68540656355283 }, new double[][] {
                    { 2.886778573963039, 1.5257474543463154 },
                    { 1.5257474543463154, 3.3794567673616918 } });

    // Create components and mixture
    List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[0], mvns[0]));
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[1], mvns[1]));

    MixtureMultivariateNormalDistribution badInitialMix
        = new MixtureMultivariateNormalDistribution(components);

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);;
Example #8
Source File:    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test(expected = DimensionMismatchException.class)
public void testIncompatibleIntialMixture() {
    // Data has 3 columns
    double[][] data = new double[][] {
            { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 }
    double[] weights = new double[] { 0.5, 0.5 };

    // These distributions are compatible with 2-column data, not 3-column
    // data
    MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[2];

    mvns[0] = new MultivariateNormalDistribution(new double[] {
                    -0.0021722935000328823, 3.5432892936887908 },
                    new double[][] {
                            { 4.537422569229048, 3.5266152281729304 },
                            { 3.5266152281729304, 6.175448814169779 } });
    mvns[1] = new MultivariateNormalDistribution(new double[] {
                    5.090902706507635, 8.68540656355283 }, new double[][] {
                    { 2.886778573963039, 1.5257474543463154 },
                    { 1.5257474543463154, 3.3794567673616918 } });

    // Create components and mixture
    List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[0], mvns[0]));
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[1], mvns[1]));

    MixtureMultivariateNormalDistribution badInitialMix
        = new MixtureMultivariateNormalDistribution(components);

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);;
Example #9
Source File:    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test(expected = DimensionMismatchException.class)
public void testIncompatibleIntialMixture() {
    // Data has 3 columns
    double[][] data = new double[][] {
            { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 }
    double[] weights = new double[] { 0.5, 0.5 };

    // These distributions are compatible with 2-column data, not 3-column
    // data
    MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[2];

    mvns[0] = new MultivariateNormalDistribution(new double[] {
                    -0.0021722935000328823, 3.5432892936887908 },
                    new double[][] {
                            { 4.537422569229048, 3.5266152281729304 },
                            { 3.5266152281729304, 6.175448814169779 } });
    mvns[1] = new MultivariateNormalDistribution(new double[] {
                    5.090902706507635, 8.68540656355283 }, new double[][] {
                    { 2.886778573963039, 1.5257474543463154 },
                    { 1.5257474543463154, 3.3794567673616918 } });

    // Create components and mixture
    List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[0], mvns[0]));
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[1], mvns[1]));

    MixtureMultivariateNormalDistribution badInitialMix
        = new MixtureMultivariateNormalDistribution(components);

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);;
Example #10
Source File:    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test(expected = DimensionMismatchException.class)
public void testIncompatibleIntialMixture() {
    // Data has 3 columns
    double[][] data = new double[][] {
            { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 }
    double[] weights = new double[] { 0.5, 0.5 };

    // These distributions are compatible with 2-column data, not 3-column
    // data
    MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[2];

    mvns[0] = new MultivariateNormalDistribution(new double[] {
                    -0.0021722935000328823, 3.5432892936887908 },
                    new double[][] {
                            { 4.537422569229048, 3.5266152281729304 },
                            { 3.5266152281729304, 6.175448814169779 } });
    mvns[1] = new MultivariateNormalDistribution(new double[] {
                    5.090902706507635, 8.68540656355283 }, new double[][] {
                    { 2.886778573963039, 1.5257474543463154 },
                    { 1.5257474543463154, 3.3794567673616918 } });

    // Create components and mixture
    List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[0], mvns[0]));
    components.add(new Pair<Double, MultivariateNormalDistribution>(
            weights[1], mvns[1]));

    MixtureMultivariateNormalDistribution badInitialMix
        = new MixtureMultivariateNormalDistribution(components);

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);;
Example #11
Source File:    From macrobase with Apache License 2.0 5 votes vote down vote up
public void bivariateNormalTest() throws Exception {
    // Make sure we are close to bivariate normal
    MacroBaseConf conf = new MacroBaseConf()
            .set(MacroBaseConf.TRANSFORM_TYPE, "BAYESIAN_NORMAL")
            .set(MacroBaseConf.DATA_LOADER_TYPE, "CSV_LOADER")
            .set(MacroBaseConf.CSV_COMPRESSION, CSVIngester.Compression.GZIP)
            .set(MacroBaseConf.CSV_INPUT_FILE, "src/test/resources/data/2d_standard_normal_100k.csv.gz")
            .set(MacroBaseConf.METRICS, "XX, YY")
            .set(MacroBaseConf.ATTRIBUTES, "");

    double[] means = {0, 0};
    double[][] variance = {{1, 0}, {0, 1}};
    MultivariateNormalDistribution bivariateNormal = new MultivariateNormalDistribution(means, variance);

    List<Datum> data = conf.constructIngester().getStream().drain();
    assertEquals(100000, data.size());

    BayesianNormalDensity bayesianNormal = new BayesianNormalDensity(conf);

    assertEquals(0, bayesianNormal.getMean().getEntry(0), 0.01);
    assertEquals(0, bayesianNormal.getMean().getEntry(1), 0.01);

    Datum d;
    int index;
    Random rand = new Random();
    for (int i =0 ; i < 100; i++ ) {
        index = rand.nextInt(data.size());
        d = data.get(index);
        assertEquals(bivariateNormal.density(d.metrics().toArray()), bayesianNormal.getDensity(d), 1e-3);
Example #12
Source File:    From macrobase with Apache License 2.0 5 votes vote down vote up
public MultivariateNormal(RealVector mean, RealMatrix sigma) {
    double[][] arrayOfMatrix = new double[sigma.getColumnDimension()][sigma.getRowDimension()];
    for (int i = 0; i < sigma.getColumnDimension(); i++) {
        arrayOfMatrix[i] = sigma.getRow(i);
    distribution = new MultivariateNormalDistribution(mean.toArray(), arrayOfMatrix);
Example #13
Source File:    From macrobase with Apache License 2.0 5 votes vote down vote up
public void testMahalanobis() {
    MultivariateNormalDistribution mvNormal = getSample3dNormal();
    Gaussian gaussian = new Gaussian(mvNormal.getMeans(), mvNormal.getCovariances());

    int N = 100000;
    int k = 3;
    double[][] testData = new double[N][k];
    for (int i = 0; i < N; i++) {
        testData[i] = mvNormal.sample();

    double[] mScores = new double[N];
    long startTime = System.currentTimeMillis();
    for (int i = 0; i < N; i++) {
        mScores[i] = gaussian.mahalanobis(testData[i]);
    long endTime = System.currentTimeMillis();
    log.debug("Mahalobis distance on {} in {}", N, endTime-startTime);

    double[] dScores = new double[N];
    startTime = System.currentTimeMillis();
    for (int i = 0; i < N; i++) {
        dScores[i] = -Math.log(mvNormal.density(testData[i]));
    endTime = System.currentTimeMillis();
    log.debug("LogPDF on {} in {}", N, endTime-startTime);

    // Check that mahalonbis distance has same relative magnitude as -log(pdf)
    for (int i = 1; i < N; i++) {
        assertEquals(mScores[i] > mScores[i-1], dScores[i] > dScores[i-1]);
Example #14
Source File:    From macrobase with Apache License 2.0 5 votes vote down vote up
private static MultivariateNormalDistribution getSample3dNormal() {
    double[] mean = {1,2,3};
    double[][] cov = {
    MultivariateNormalDistribution mvNormal = new MultivariateNormalDistribution(mean, cov);
    return mvNormal;
Example #15
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testInitialMixture() {
    // Testing initial mixture estimated from data
    final double[] correctWeights = new double[] { 0.5, 0.5 };

    final double[][] correctMeans = new double[][] {
        {-0.0021722935000328823, 3.5432892936887908},
        {5.090902706507635, 8.68540656355283},

    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];

    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
            { 4.537422569229048, 3.5266152281729304 },
            { 3.5266152281729304, 6.175448814169779 } });

    correctCovMats[1] = new Array2DRowRealMatrix( new double[][] {
            { 2.886778573963039, 1.5257474543463154 },
            { 1.5257474543463154, 3.3794567673616918 } });

    final MultivariateNormalDistribution[] correctMVNs = new

    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0],

    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1],

    final MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(getTestSamples(), 2);

    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : initialMix
            .getComponents()) {
        Assert.assertEquals(correctWeights[i], component.getFirst(),
        final double[] means = component.getValue().getMeans();
        Assert.assertTrue(Arrays.equals(correctMeans[i], means));
        final RealMatrix covMat = component.getValue().getCovariances();
        Assert.assertEquals(correctCovMats[i], covMat);
Example #16
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
 * Helper method to create a multivariate normal mixture model which can be
 * used to initialize {@link #fit(MixtureMultivariateNormalDistribution)}.
 * This method uses the data supplied to the constructor to try to determine
 * a good mixture model at which to start the fit, but it is not guaranteed
 * to supply a model which will find the optimal solution or even converge.
 * @param data Data to estimate distribution
 * @param numComponents Number of components for estimated mixture
 * @return Multivariate normal mixture model estimated from the data
 * @throws NumberIsTooLargeException if {@code numComponents} is greater
 * than the number of data rows.
 * @throws NumberIsTooSmallException if {@code numComponents < 2}.
 * @throws NotStrictlyPositiveException if data has less than 2 rows
 * @throws DimensionMismatchException if rows of data have different numbers
 *             of columns
public static MixtureMultivariateNormalDistribution estimate(final double[][] data,
                                                             final int numComponents)
    throws NotStrictlyPositiveException,
           DimensionMismatchException {
    if (data.length < 2) {
        throw new NotStrictlyPositiveException(data.length);
    if (numComponents < 2) {
        throw new NumberIsTooSmallException(numComponents, 2, true);
    if (numComponents > data.length) {
        throw new NumberIsTooLargeException(numComponents, data.length, true);

    final int numRows = data.length;
    final int numCols = data[0].length;

    // sort the data
    final DataRow[] sortedData = new DataRow[numRows];
    for (int i = 0; i < numRows; i++) {
        sortedData[i] = new DataRow(data[i]);

    // uniform weight for each bin
    final double weight = 1d / numComponents;

    // components of mixture model to be created
    final List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>(numComponents);

    // create a component based on data in each bin
    for (int binIndex = 0; binIndex < numComponents; binIndex++) {
        // minimum index (inclusive) from sorted data for this bin
        final int minIndex = (binIndex * numRows) / numComponents;

        // maximum index (exclusive) from sorted data for this bin
        final int maxIndex = ((binIndex + 1) * numRows) / numComponents;

        // number of data records that will be in this bin
        final int numBinRows = maxIndex - minIndex;

        // data for this bin
        final double[][] binData = new double[numBinRows][numCols];

        // mean of each column for the data in the this bin
        final double[] columnMeans = new double[numCols];

        // populate bin and create component
        for (int i = minIndex, iBin = 0; i < maxIndex; i++, iBin++) {
            for (int j = 0; j < numCols; j++) {
                final double val = sortedData[i].getRow()[j];
                columnMeans[j] += val;
                binData[iBin][j] = val;

        MathArrays.scaleInPlace(1d / numBinRows, columnMeans);

        // covariance matrix for this bin
        final double[][] covMat
            = new Covariance(binData).getCovarianceMatrix().getData();
        final MultivariateNormalDistribution mvn
            = new MultivariateNormalDistribution(columnMeans, covMat);

        components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn));

    return new MixtureMultivariateNormalDistribution(components);
Example #17
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testFit() {
    // Test that the loglikelihood, weights, and models are determined and
    // fitted correctly
    final double[][] data = getTestSamples();
    final double correctLogLikelihood = -4.292431006791994;
    final double[] correctWeights = new double[] { 0.2962324189652912, 0.7037675810347089 };
    final double[][] correctMeans = new double[][]{
        {-1.4213112715121132, 1.6924690505757753},
        {4.213612224374709, 7.975621325853645}
    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];
    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
        { 1.739356907285747, -0.5867644251487614 },
        { -0.5867644251487614, 1.0232932029324642 } }
    correctCovMats[1] = new Array2DRowRealMatrix(new double[][] {
        { 4.245384898007161, 2.5797798966382155 },
        { 2.5797798966382155, 3.9200272522448367 } });
    final MultivariateNormalDistribution[] correctMVNs = new MultivariateNormalDistribution[2];
    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0], correctCovMats[0].getData());
    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1], correctCovMats[1].getData());

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);

    MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(data, 2);;
    MixtureMultivariateNormalDistribution fittedMix = fitter.getFittedModel();
    List<Pair<Double, MultivariateNormalDistribution>> components = fittedMix.getComponents();


    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : components) {
        final double weight = component.getFirst();
        final MultivariateNormalDistribution mvn = component.getSecond();
        final double[] mean = mvn.getMeans();
        final RealMatrix covMat = mvn.getCovariances();
        Assert.assertEquals(correctWeights[i], weight, Math.ulp(1d));
        Assert.assertTrue(Arrays.equals(correctMeans[i], mean));
        Assert.assertEquals(correctCovMats[i], covMat);
Example #18
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testInitialMixture() {
    // Testing initial mixture estimated from data
    final double[] correctWeights = new double[] { 0.5, 0.5 };

    final double[][] correctMeans = new double[][] {
        {-0.0021722935000328823, 3.5432892936887908},
        {5.090902706507635, 8.68540656355283},

    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];

    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
            { 4.537422569229048, 3.5266152281729304 },
            { 3.5266152281729304, 6.175448814169779 } });

    correctCovMats[1] = new Array2DRowRealMatrix( new double[][] {
            { 2.886778573963039, 1.5257474543463154 },
            { 1.5257474543463154, 3.3794567673616918 } });

    final MultivariateNormalDistribution[] correctMVNs = new

    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0],

    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1],

    final MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(getTestSamples(), 2);

    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : initialMix
            .getComponents()) {
        Assert.assertEquals(correctWeights[i], component.getFirst(),
        final double[] means = component.getValue().getMeans();
        Assert.assertTrue(Arrays.equals(correctMeans[i], means));
        final RealMatrix covMat = component.getValue().getCovariances();
        Assert.assertEquals(correctCovMats[i], covMat);
Example #19
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
 * Helper method to create a multivariate normal mixture model which can be
 * used to initialize {@link #fit(MixtureMultivariateNormalDistribution)}.
 * This method uses the data supplied to the constructor to try to determine
 * a good mixture model at which to start the fit, but it is not guaranteed
 * to supply a model which will find the optimal solution or even converge.
 * @param data Data to estimate distribution
 * @param numComponents Number of components for estimated mixture
 * @return Multivariate normal mixture model estimated from the data
 * @throws NumberIsTooLargeException if {@code numComponents} is greater
 * than the number of data rows.
 * @throws NumberIsTooSmallException if {@code numComponents < 2}.
 * @throws NotStrictlyPositiveException if data has less than 2 rows
 * @throws DimensionMismatchException if rows of data have different numbers
 *             of columns
public static MixtureMultivariateNormalDistribution estimate(final double[][] data,
                                                             final int numComponents)
    throws NotStrictlyPositiveException,
           DimensionMismatchException {
    if (data.length < 2) {
        throw new NotStrictlyPositiveException(data.length);
    if (numComponents < 2) {
        throw new NumberIsTooSmallException(numComponents, 2, true);
    if (numComponents > data.length) {
        throw new NumberIsTooLargeException(numComponents, data.length, true);

    final int numRows = data.length;
    final int numCols = data[0].length;

    // sort the data
    final DataRow[] sortedData = new DataRow[numRows];
    for (int i = 0; i < numRows; i++) {
        sortedData[i] = new DataRow(data[i]);

    // uniform weight for each bin
    final double weight = 1d / numComponents;

    // components of mixture model to be created
    final List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();

    // create a component based on data in each bin
    for (int binIndex = 0; binIndex < numComponents; binIndex++) {
        // minimum index (inclusive) from sorted data for this bin
        final int minIndex = (binIndex * numRows) / numComponents;

        // maximum index (exclusive) from sorted data for this bin
        final int maxIndex = ((binIndex + 1) * numRows) / numComponents;

        // number of data records that will be in this bin
        final int numBinRows = maxIndex - minIndex;

        // data for this bin
        final double[][] binData = new double[numBinRows][numCols];

        // mean of each column for the data in the this bin
        final double[] columnMeans = new double[numCols];

        // populate bin and create component
        for (int i = minIndex, iBin = 0; i < maxIndex; i++, iBin++) {
            for (int j = 0; j < numCols; j++) {
                final double val = sortedData[i].getRow()[j];
                columnMeans[j] += val;
                binData[iBin][j] = val;

        MathArrays.scaleInPlace(1d / numBinRows, columnMeans);

        // covariance matrix for this bin
        final double[][] covMat
            = new Covariance(binData).getCovarianceMatrix().getData();
        final MultivariateNormalDistribution mvn
            = new MultivariateNormalDistribution(columnMeans, covMat);

        components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn));

    return new MixtureMultivariateNormalDistribution(components);
Example #20
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testFit() {
    // Test that the loglikelihood, weights, and models are determined and
    // fitted correctly
    double[][] data = getTestSamples();
    double correctLogLikelihood = -4.292431006791994;
    double[] correctWeights = new double[] { 0.2962324189652912, 0.7037675810347089 };
    MultivariateNormalDistribution[] correctMVNs = new MultivariateNormalDistribution[2];
    correctMVNs[0] = new MultivariateNormalDistribution(new double[] {
                    -1.4213112715121132, 1.6924690505757753 },
                    new double[][] {
                            { 1.739356907285747, -0.5867644251487614 },
                            { -0.5867644251487614, 1.0232932029324642 } });

    correctMVNs[1] = new MultivariateNormalDistribution(new double[] {
                    4.213612224374709, 7.975621325853645 },
                    new double[][] {
                            { 4.245384898007161, 2.5797798966382155 },
                            { 2.5797798966382155, 3.9200272522448367 } });

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);

    MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(data, 2);;
    MixtureMultivariateNormalDistribution fittedMix = fitter.getFittedModel();
    List<Pair<Double, MultivariateNormalDistribution>> components = fittedMix.getComponents();


    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : components) {
        double weight = component.getFirst();
        MultivariateNormalDistribution mvn = component.getSecond();
        Assert.assertEquals(correctWeights[i], weight, Math.ulp(1d));
        Assert.assertEquals(correctMVNs[i], mvn);
Example #21
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testFit() {
    // Test that the loglikelihood, weights, and models are determined and
    // fitted correctly
    final double[][] data = getTestSamples();
    final double correctLogLikelihood = -4.292431006791994;
    final double[] correctWeights = new double[] { 0.2962324189652912, 0.7037675810347089 };
    final double[][] correctMeans = new double[][]{
        {-1.4213112715121132, 1.6924690505757753},
        {4.213612224374709, 7.975621325853645}
    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];
    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
        { 1.739356907285747, -0.5867644251487614 },
        { -0.5867644251487614, 1.0232932029324642 } }
    correctCovMats[1] = new Array2DRowRealMatrix(new double[][] {
        { 4.245384898007161, 2.5797798966382155 },
        { 2.5797798966382155, 3.9200272522448367 } });
    final MultivariateNormalDistribution[] correctMVNs = new MultivariateNormalDistribution[2];
    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0], correctCovMats[0].getData());
    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1], correctCovMats[1].getData());

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);

    MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(data, 2);;
    MixtureMultivariateNormalDistribution fittedMix = fitter.getFittedModel();
    List<Pair<Double, MultivariateNormalDistribution>> components = fittedMix.getComponents();


    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : components) {
        final double weight = component.getFirst();
        final MultivariateNormalDistribution mvn = component.getSecond();
        final double[] mean = mvn.getMeans();
        final RealMatrix covMat = mvn.getCovariances();
        Assert.assertEquals(correctWeights[i], weight, Math.ulp(1d));
        Assert.assertTrue(Arrays.equals(correctMeans[i], mean));
        Assert.assertEquals(correctCovMats[i], covMat);
Example #22
Source File:    From pyramid with Apache License 2.0 4 votes vote down vote up
 * 2 labels, 3 features, multi-variate gaussian noise
 * y0: w=(0,1,0)
 * y1: w=(1,0,0)
 * y2: w=(0,0,1)
 * @return
public static MultiLabelClfDataSet gaussianNoise(int numData){
    int numClass = 3;
    int numFeature = 3;

    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature)

    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k=0;k<numClass;k++){
        Vector vector = new DenseVector(numFeature);
        weights[k] = vector;


    weights[1].set(0, 1);

    weights[2].set(2, 1);

    // generate features
    for (int i=0;i<numData;i++){
        for (int j=0;j<numFeature;j++){
            dataSet.setFeatureValue(i,j,Sampling.doubleUniform(-1, 1));

    double[] means = new double[numClass];
    double[][] covars = new double[numClass][numClass];
    covars[0][1]=0.02;         covars[1][0]=0.02;
    covars[0][2]=-0.03;         covars[2][0]=-0.03;

    covars[1][2]=-0.03;         covars[2][1]=-0.03;


    MultivariateNormalDistribution distribution = new MultivariateNormalDistribution(means,covars);

    // assign labels
    int numFlipped = 0;
    for (int i=0;i<numData;i++){
        double[] noises = distribution.sample();
        for (int k=0;k<numClass;k++){
            double dot = weights[k].dot(dataSet.getRow(i));
            double score = dot + noises[k];
            if (score>=0){
            if (dot*score<0){
                numFlipped += 1;

    System.out.println("number of flipped bits = "+numFlipped);
    return dataSet;
Example #23
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
 * Helper method to create a multivariate normal mixture model which can be
 * used to initialize {@link #fit(MixtureMultivariateRealDistribution)}.
 * This method uses the data supplied to the constructor to try to determine
 * a good mixture model at which to start the fit, but it is not guaranteed
 * to supply a model which will find the optimal solution or even converge.
 * @param data Data to estimate distribution
 * @param numComponents Number of components for estimated mixture
 * @return Multivariate normal mixture model estimated from the data
 * @throws NumberIsTooLargeException if {@code numComponents\ is greater
 * than the number of data rows.
 * @throws NumberIsTooSmallException if {@code numComponents < 2}.
 * @throws NotStrictlyPositiveException if data has less than 2 rows
 * @throws DimensionMismatchException if rows of data have different numbers
 *             of columns
 * @see #fit
public static MixtureMultivariateNormalDistribution estimate(final double[][] data,
                                                             final int numComponents)
    throws NotStrictlyPositiveException,
           DimensionMismatchException {
    if (data.length < 2) {
        throw new NotStrictlyPositiveException(data.length);
    if (numComponents < 2) {
        throw new NumberIsTooSmallException(numComponents, 2, true);
    if (numComponents > data.length) {
        throw new NumberIsTooLargeException(numComponents, data.length, true);

    final int numRows = data.length;
    final int numCols = data[0].length;

    // sort the data
    final DataRow[] sortedData = new DataRow[numRows];
    for (int i = 0; i < numRows; i++) {
        sortedData[i] = new DataRow(data[i]);

    final int totalBins = numComponents;

    // uniform weight for each bin
    final double weight = 1d / totalBins;

    // components of mixture model to be created
    final List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>();

    // create a component based on data in each bin
    for (int binNumber = 1; binNumber <= totalBins; binNumber++) {
        // minimum index from sorted data for this bin
        final int minIndex
            = (int) FastMath.max(0,
                                 FastMath.floor((binNumber - 1) * numRows / totalBins));

        // maximum index from sorted data for this bin
        final int maxIndex
            = (int) FastMath.ceil(binNumber * numRows / numComponents) - 1;

        // number of data records that will be in this bin
        final int numBinRows = maxIndex - minIndex + 1;

        // data for this bin
        final double[][] binData = new double[numBinRows][numCols];

        // mean of each column for the data in the this bin
        final double[] columnMeans = new double[numCols];

        // populate bin and create component
        for (int i = minIndex, iBin = 0; i <= maxIndex; i++, iBin++) {
            for (int j = 0; j < numCols; j++) {
                final double val = sortedData[i].getRow()[j];
                columnMeans[j] += val;
                binData[iBin][j] = val;

        MathArrays.scaleInPlace(1d / numBinRows, columnMeans);

        // covariance matrix for this bin
        final double[][] covMat
            = new Covariance(binData).getCovarianceMatrix().getData();
        final MultivariateNormalDistribution mvn
            = new MultivariateNormalDistribution(columnMeans, covMat);

        components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn));

    return new MixtureMultivariateNormalDistribution(components);
Example #24
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testFit() {
    // Test that the loglikelihood, weights, and models are determined and
    // fitted correctly
    final double[][] data = getTestSamples();
    final double correctLogLikelihood = -4.292431006791994;
    final double[] correctWeights = new double[] { 0.2962324189652912, 0.7037675810347089 };
    final double[][] correctMeans = new double[][]{
        {-1.4213112715121132, 1.6924690505757753},
        {4.213612224374709, 7.975621325853645}
    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];
    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
        { 1.739356907285747, -0.5867644251487614 },
        { -0.5867644251487614, 1.0232932029324642 } }
    correctCovMats[1] = new Array2DRowRealMatrix(new double[][] {
        { 4.245384898007161, 2.5797798966382155 },
        { 2.5797798966382155, 3.9200272522448367 } });
    final MultivariateNormalDistribution[] correctMVNs = new MultivariateNormalDistribution[2];
    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0], correctCovMats[0].getData());
    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1], correctCovMats[1].getData());

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);

    MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(data, 2);;
    MixtureMultivariateNormalDistribution fittedMix = fitter.getFittedModel();
    List<Pair<Double, MultivariateNormalDistribution>> components = fittedMix.getComponents();


    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : components) {
        final double weight = component.getFirst();
        final MultivariateNormalDistribution mvn = component.getSecond();
        final double[] mean = mvn.getMeans();
        final RealMatrix covMat = mvn.getCovariances();
        Assert.assertEquals(correctWeights[i], weight, Math.ulp(1d));
        Assert.assertTrue(Arrays.equals(correctMeans[i], mean));
        Assert.assertEquals(correctCovMats[i], covMat);
Example #25
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testInitialMixture() {
    // Testing initial mixture estimated from data
    final double[] correctWeights = new double[] { 0.5, 0.5 };

    final double[][] correctMeans = new double[][] {
        {-0.0021722935000328823, 3.5432892936887908},
        {5.090902706507635, 8.68540656355283},

    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];

    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
            { 4.537422569229048, 3.5266152281729304 },
            { 3.5266152281729304, 6.175448814169779 } });

    correctCovMats[1] = new Array2DRowRealMatrix( new double[][] {
            { 2.886778573963039, 1.5257474543463154 },
            { 1.5257474543463154, 3.3794567673616918 } });

    final MultivariateNormalDistribution[] correctMVNs = new

    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0],

    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1],

    final MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(getTestSamples(), 2);

    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : initialMix
            .getComponents()) {
        Assert.assertEquals(correctWeights[i], component.getFirst(),
        final double[] means = component.getValue().getMeans();
        Assert.assertTrue(Arrays.equals(correctMeans[i], means));
        final RealMatrix covMat = component.getValue().getCovariances();
        Assert.assertEquals(correctCovMats[i], covMat);
Example #26
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
 * Helper method to create a multivariate normal mixture model which can be
 * used to initialize {@link #fit(MixtureMultivariateNormalDistribution)}.
 * This method uses the data supplied to the constructor to try to determine
 * a good mixture model at which to start the fit, but it is not guaranteed
 * to supply a model which will find the optimal solution or even converge.
 * @param data Data to estimate distribution
 * @param numComponents Number of components for estimated mixture
 * @return Multivariate normal mixture model estimated from the data
 * @throws NumberIsTooLargeException if {@code numComponents} is greater
 * than the number of data rows.
 * @throws NumberIsTooSmallException if {@code numComponents < 2}.
 * @throws NotStrictlyPositiveException if data has less than 2 rows
 * @throws DimensionMismatchException if rows of data have different numbers
 *             of columns
public static MixtureMultivariateNormalDistribution estimate(final double[][] data,
                                                             final int numComponents)
    throws NotStrictlyPositiveException,
           DimensionMismatchException {
    if (data.length < 2) {
        throw new NotStrictlyPositiveException(data.length);
    if (numComponents < 2) {
        throw new NumberIsTooSmallException(numComponents, 2, true);
    if (numComponents > data.length) {
        throw new NumberIsTooLargeException(numComponents, data.length, true);

    final int numRows = data.length;
    final int numCols = data[0].length;

    // sort the data
    final DataRow[] sortedData = new DataRow[numRows];
    for (int i = 0; i < numRows; i++) {
        sortedData[i] = new DataRow(data[i]);

    // uniform weight for each bin
    final double weight = 1d / numComponents;

    // components of mixture model to be created
    final List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>(numComponents);

    // create a component based on data in each bin
    for (int binIndex = 0; binIndex < numComponents; binIndex++) {
        // minimum index (inclusive) from sorted data for this bin
        final int minIndex = (binIndex * numRows) / numComponents;

        // maximum index (exclusive) from sorted data for this bin
        final int maxIndex = ((binIndex + 1) * numRows) / numComponents;

        // number of data records that will be in this bin
        final int numBinRows = maxIndex - minIndex;

        // data for this bin
        final double[][] binData = new double[numBinRows][numCols];

        // mean of each column for the data in the this bin
        final double[] columnMeans = new double[numCols];

        // populate bin and create component
        for (int i = minIndex, iBin = 0; i < maxIndex; i++, iBin++) {
            for (int j = 0; j < numCols; j++) {
                final double val = sortedData[i].getRow()[j];
                columnMeans[j] += val;
                binData[iBin][j] = val;

        MathArrays.scaleInPlace(1d / numBinRows, columnMeans);

        // covariance matrix for this bin
        final double[][] covMat
            = new Covariance(binData).getCovarianceMatrix().getData();
        final MultivariateNormalDistribution mvn
            = new MultivariateNormalDistribution(columnMeans, covMat);

        components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn));

    return new MixtureMultivariateNormalDistribution(components);
Example #27
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testFit() {
    // Test that the loglikelihood, weights, and models are determined and
    // fitted correctly
    final double[][] data = getTestSamples();
    final double correctLogLikelihood = -4.292431006791994;
    final double[] correctWeights = new double[] { 0.2962324189652912, 0.7037675810347089 };
    final double[][] correctMeans = new double[][]{
        {-1.4213112715121132, 1.6924690505757753},
        {4.213612224374709, 7.975621325853645}
    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];
    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
        { 1.739356907285747, -0.5867644251487614 },
        { -0.5867644251487614, 1.0232932029324642 } }
    correctCovMats[1] = new Array2DRowRealMatrix(new double[][] {
        { 4.245384898007161, 2.5797798966382155 },
        { 2.5797798966382155, 3.9200272522448367 } });
    final MultivariateNormalDistribution[] correctMVNs = new MultivariateNormalDistribution[2];
    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0], correctCovMats[0].getData());
    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1], correctCovMats[1].getData());

    MultivariateNormalMixtureExpectationMaximization fitter
        = new MultivariateNormalMixtureExpectationMaximization(data);

    MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(data, 2);;
    MixtureMultivariateNormalDistribution fittedMix = fitter.getFittedModel();
    List<Pair<Double, MultivariateNormalDistribution>> components = fittedMix.getComponents();


    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : components) {
        final double weight = component.getFirst();
        final MultivariateNormalDistribution mvn = component.getSecond();
        final double[] mean = mvn.getMeans();
        final RealMatrix covMat = mvn.getCovariances();
        Assert.assertEquals(correctWeights[i], weight, Math.ulp(1d));
        Assert.assertTrue(Arrays.equals(correctMeans[i], mean));
        Assert.assertEquals(correctCovMats[i], covMat);
Example #28
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
public void testInitialMixture() {
    // Testing initial mixture estimated from data
    final double[] correctWeights = new double[] { 0.5, 0.5 };

    final double[][] correctMeans = new double[][] {
        {-0.0021722935000328823, 3.5432892936887908},
        {5.090902706507635, 8.68540656355283},

    final RealMatrix[] correctCovMats = new Array2DRowRealMatrix[2];

    correctCovMats[0] = new Array2DRowRealMatrix(new double[][] {
            { 4.537422569229048, 3.5266152281729304 },
            { 3.5266152281729304, 6.175448814169779 } });

    correctCovMats[1] = new Array2DRowRealMatrix( new double[][] {
            { 2.886778573963039, 1.5257474543463154 },
            { 1.5257474543463154, 3.3794567673616918 } });

    final MultivariateNormalDistribution[] correctMVNs = new

    correctMVNs[0] = new MultivariateNormalDistribution(correctMeans[0],

    correctMVNs[1] = new MultivariateNormalDistribution(correctMeans[1],

    final MixtureMultivariateNormalDistribution initialMix
        = MultivariateNormalMixtureExpectationMaximization.estimate(getTestSamples(), 2);

    int i = 0;
    for (Pair<Double, MultivariateNormalDistribution> component : initialMix
            .getComponents()) {
        Assert.assertEquals(correctWeights[i], component.getFirst(),
        final double[] means = component.getValue().getMeans();
        Assert.assertTrue(Arrays.equals(correctMeans[i], means));
        final RealMatrix covMat = component.getValue().getCovariances();
        Assert.assertEquals(correctCovMats[i], covMat);
Example #29
Source File:    From astor with GNU General Public License v2.0 4 votes vote down vote up
 * Helper method to create a multivariate normal mixture model which can be
 * used to initialize {@link #fit(MixtureMultivariateNormalDistribution)}.
 * This method uses the data supplied to the constructor to try to determine
 * a good mixture model at which to start the fit, but it is not guaranteed
 * to supply a model which will find the optimal solution or even converge.
 * @param data Data to estimate distribution
 * @param numComponents Number of components for estimated mixture
 * @return Multivariate normal mixture model estimated from the data
 * @throws NumberIsTooLargeException if {@code numComponents} is greater
 * than the number of data rows.
 * @throws NumberIsTooSmallException if {@code numComponents < 2}.
 * @throws NotStrictlyPositiveException if data has less than 2 rows
 * @throws DimensionMismatchException if rows of data have different numbers
 *             of columns
public static MixtureMultivariateNormalDistribution estimate(final double[][] data,
                                                             final int numComponents)
    throws NotStrictlyPositiveException,
           DimensionMismatchException {
    if (data.length < 2) {
        throw new NotStrictlyPositiveException(data.length);
    if (numComponents < 2) {
        throw new NumberIsTooSmallException(numComponents, 2, true);
    if (numComponents > data.length) {
        throw new NumberIsTooLargeException(numComponents, data.length, true);

    final int numRows = data.length;
    final int numCols = data[0].length;

    // sort the data
    final DataRow[] sortedData = new DataRow[numRows];
    for (int i = 0; i < numRows; i++) {
        sortedData[i] = new DataRow(data[i]);

    // uniform weight for each bin
    final double weight = 1d / numComponents;

    // components of mixture model to be created
    final List<Pair<Double, MultivariateNormalDistribution>> components =
            new ArrayList<Pair<Double, MultivariateNormalDistribution>>(numComponents);

    // create a component based on data in each bin
    for (int binIndex = 0; binIndex < numComponents; binIndex++) {
        // minimum index (inclusive) from sorted data for this bin
        final int minIndex = (binIndex * numRows) / numComponents;

        // maximum index (exclusive) from sorted data for this bin
        final int maxIndex = ((binIndex + 1) * numRows) / numComponents;

        // number of data records that will be in this bin
        final int numBinRows = maxIndex - minIndex;

        // data for this bin
        final double[][] binData = new double[numBinRows][numCols];

        // mean of each column for the data in the this bin
        final double[] columnMeans = new double[numCols];

        // populate bin and create component
        for (int i = minIndex, iBin = 0; i < maxIndex; i++, iBin++) {
            for (int j = 0; j < numCols; j++) {
                final double val = sortedData[i].getRow()[j];
                columnMeans[j] += val;
                binData[iBin][j] = val;

        MathArrays.scaleInPlace(1d / numBinRows, columnMeans);

        // covariance matrix for this bin
        final double[][] covMat
            = new Covariance(binData).getCovarianceMatrix().getData();
        final MultivariateNormalDistribution mvn
            = new MultivariateNormalDistribution(columnMeans, covMat);

        components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn));

    return new MixtureMultivariateNormalDistribution(components);
Example #30
Source File:    From macrobase with Apache License 2.0 4 votes vote down vote up
public MultivariateNormalDistribution getDistribution() {
    return distribution;