Java Code Examples for org.apache.commons.math3.random.RandomGenerator#setSeed()

The following examples show how to use org.apache.commons.math3.random.RandomGenerator#setSeed() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PercentileTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testStoredVsDirect() {
    final RandomGenerator rand= new JDKRandomGenerator();
    rand.setSeed(Long.MAX_VALUE);
    for (final int sampleSize:sampleSizes) {
        final double[] data = new NormalDistribution(rand,4000, 50)
                            .sample(sampleSize);
        for (final double p:new double[] {50d,95d}) {
            for (final Percentile.EstimationType e : Percentile.EstimationType.values()) {
                reset(p, e);
                final Percentile pStoredData = getUnivariateStatistic();
                pStoredData.setData(data);
                final double storedDataResult=pStoredData.evaluate();
                pStoredData.setData(null);
                final Percentile pDirect = getUnivariateStatistic();
                Assert.assertEquals("Sample="+sampleSize+",P="+p+" e="+e,
                        storedDataResult,
                        pDirect.evaluate(data),0d);
            }
        }
    }
}
 
Example 2
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 3
Source File: PercentileTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testStoredVsDirect() {
    final RandomGenerator rand= new JDKRandomGenerator();
    rand.setSeed(Long.MAX_VALUE);
    for (final int sampleSize:sampleSizes) {
        final double[] data = new NormalDistribution(rand,4000, 50)
                            .sample(sampleSize);
        for (final double p:new double[] {50d,95d}) {
            for (final Percentile.EstimationType e : Percentile.EstimationType.values()) {
                reset(p, e);
                final Percentile pStoredData = getUnivariateStatistic();
                pStoredData.setData(data);
                final double storedDataResult=pStoredData.evaluate();
                pStoredData.setData(null);
                final Percentile pDirect = getUnivariateStatistic();
                Assert.assertEquals("Sample="+sampleSize+",P="+p+" e="+e,
                        storedDataResult,
                        pDirect.evaluate(data),0d);
            }
        }
    }
}
 
Example 4
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 5
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 1, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 4, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 1, 3, 4, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 6
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 7
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 1, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 4, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 1, 3, 4, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 8
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 9
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 3, 6, 7, 3, 8, Double.NaN, 1, 2 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 2, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 3, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 4, 4, 6, 7, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 2, 3, 3, 3 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 10
Source File: NaturalRankingTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testNaNsFixedTiesRandom() {
    RandomGenerator randomGenerator = new JDKRandomGenerator();
    randomGenerator.setSeed(1000);
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
            randomGenerator);
    double[] ranks = ranking.rank(exampleData);
    double[] correctRanks = { 5, 4, 6, 7, 3, 8, Double.NaN, 1, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesFirst);
    correctRanks = new double[] { 1, 1, 4, 3, 5 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(tiesLast);
    correctRanks = new double[] { 3, 4, 2, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleNaNs);
    correctRanks = new double[] { 1, 2, Double.NaN, Double.NaN };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(multipleTies);
    correctRanks = new double[] { 3, 2, 5, 5, 7, 6, 1 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
    ranks = ranking.rank(allSame);
    correctRanks = new double[] { 1, 3, 4, 4 };
    TestUtils.assertEquals(correctRanks, ranks, 0d);
}
 
Example 11
Source File: PartitionTest.java    From sequence-mining with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testInterleavingGenerator() {

	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	final Multiset<Sequence> seqsI = HashMultiset.create();
	seqsI.add(new Sequence(1, 2, 3));
	seqsI.add(new Sequence(4, 5));
	seqsI.add(new Sequence(6));
	seqsI.add(new Sequence(7));

	final HashMap<Sequence, Double> seqsG = new HashMap<>();
	for (final Sequence seq : seqsI.elementSet()) {
		seqsG.put(seq, 1.0);
	}

	final Map<Sequence, EnumeratedIntegerDistribution> countDists = new HashMap<>();
	final EnumeratedIntegerDistribution oneRepeat = new EnumeratedIntegerDistribution(randomC, new int[] { 1 },
			new double[] { 1.0 });
	countDists.put(new Sequence(1, 2, 3), oneRepeat);
	countDists.put(new Sequence(4, 5), oneRepeat);
	countDists.put(new Sequence(6), oneRepeat);
	countDists.put(new Sequence(7), oneRepeat);

	final HashSet<Transaction> transG = new HashSet<>();
	for (int i = 0; i < 700000; i++)
		transG.add(
				TransactionGenerator.sampleFromDistribution(random, seqsG, countDists, new HashMap<>(), randomI));
	// Note that upper bound is exact when there are no repetitions
	assertEquals(transG.size(), modP(seqsI.iterator()), EPS);
}
 
Example 12
Source File: RandomManager.java    From myrrix-recommender with Apache License 2.0 5 votes vote down vote up
/**
 * Causes all known instances of {@link RandomGenerator}, and future ones, to be started from a fixed
 * seed. This is useful for making tests deterministic.
 */
public static void useTestSeed() {
  useTestSeed = true;
  synchronized (INSTANCES) {
    for (RandomGenerator random : INSTANCES.keySet()) {
      random.setSeed(TEST_SEED);
    }
    INSTANCES.clear();
  }
}
 
Example 13
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() throws Exception {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}
 
Example 14
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}
 
Example 15
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}
 
Example 16
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}
 
Example 17
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}
 
Example 18
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}
 
Example 19
Source File: TransactionGenerator.java    From sequence-mining with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences,
		final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile)
				throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Add to distribution class for easy sampling
	final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>();
	for (final Sequence seq : sequences.keySet()) {
		final List<Integer> singletons = new ArrayList<>();
		final List<Double> probs = new ArrayList<>();
		for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) {
			singletons.add(entry.getKey());
			probs.add(entry.getValue());
		}
		final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC,
				Ints.toArray(singletons), Doubles.toArray(probs));
		dists.put(seq, dist);
	}

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}
 
Example 20
Source File: GLSMultipleLinearRegressionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Generate an error covariance matrix and sample data representing models
 * with this error structure. Then verify that GLS estimated coefficients,
 * on average, perform better than OLS.
 */
@Test
public void testGLSEfficiency() {
    RandomGenerator rg = new JDKRandomGenerator();
    rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
    
    // Assume model has 16 observations (will use Longley data).  Start by generating
    // non-constant variances for the 16 error terms.
    final int nObs = 16;
    double[] sigma = new double[nObs];
    for (int i = 0; i < nObs; i++) {
        sigma[i] = 10 * rg.nextDouble();
    }
    
    // Now generate 1000 error vectors to use to estimate the covariance matrix
    // Columns are draws on N(0, sigma[col])
    final int numSeeds = 1000;
    RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
    for (int i = 0; i < numSeeds; i++) {
        for (int j = 0; j < nObs; j++) {
            errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
        }
    }
    
    // Get covariance matrix for columns
    RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
      
    // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
    GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
    double[] errorMeans = new double[nObs];  // Counting on init to 0 here
    CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
     1.0e-12 * cov.getNorm(), rawGenerator);
    
    // Now start generating models.  Use Longley X matrix on LHS
    // and Longley OLS beta vector as "true" beta.  Generate
    // Y values by XB + u where u is a CorrelatedRandomVector generated
    // from cov.
    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(longley, nObs, 6);
    final RealVector b = ols.calculateBeta().copy();
    final RealMatrix x = ols.getX().copy();
    
    // Create a GLS model to reuse
    GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
    gls.newSampleData(longley, nObs, 6);
    gls.newCovarianceData(cov.getData());
    
    // Create aggregators for stats measuring model performance
    DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
    DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
    
    // Generate Y vectors for 10000 models, estimate GLS and OLS and
    // Verify that OLS estimates are better
    final int nModels = 10000;
    for (int i = 0; i < nModels; i++) {
        
        // Generate y = xb + u with u cov
        RealVector u = MatrixUtils.createRealVector(gen.nextVector());
        double[] y = u.add(x.operate(b)).toArray();
        
        // Estimate OLS parameters
        ols.newYSampleData(y);
        RealVector olsBeta = ols.calculateBeta();
        
        // Estimate GLS parameters
        gls.newYSampleData(y);
        RealVector glsBeta = gls.calculateBeta();
        
        // Record deviations from "true" beta
        double dist = olsBeta.getDistance(b);
        olsBetaStats.addValue(dist * dist);
        dist = glsBeta.getDistance(b);
        glsBetaStats.addValue(dist * dist);
        
    }
    
    // Verify that GLS is on average more efficient, lower variance
    assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
    assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());  
}