org.apache.mahout.common.RandomUtils Java Examples

The following examples show how to use org.apache.mahout.common.RandomUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TDigestTest.java    From streaminer with Apache License 2.0 6 votes vote down vote up
@Test()
public void testSizeControl() throws IOException {
    // very slow running data generator.  Don't want to run this normally.  To run slow tests use
    // mvn test -DrunSlowTests=true
    assumeTrue(Boolean.parseBoolean(System.getProperty("runSlowTests")));

    Random gen = RandomUtils.getRandom();
    PrintWriter out = new PrintWriter(new FileOutputStream("scaling.tsv"));
    out.printf("k\tsamples\tcompression\tsize1\tsize2\n");
    for (int k = 0; k < 20; k++) {
        for (int size : new int[]{10, 100, 1000, 10000}) {
            for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) {
                TDigest dist = new TDigest(compression, gen);
                for (int i = 0; i < size * 1000; i++) {
                    dist.add(gen.nextDouble());
                }
                out.printf("%d\t%d\t%.0f\t%d\t%d\n", k, size, compression, dist.smallByteSize(), dist.byteSize());
                out.flush();
            }
        }
    }
    out.printf("\n");
    out.close();
    new File("scaling.tsv").delete();
}
 
Example #2
Source File: FastByIDMap.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new {@link FastByIDMap} whose capacity can accommodate the given number of entries without rehash.
 *
 * @param size desired capacity
 * @param maxSize max capacity
 * @param loadFactor ratio of internal hash table size to current size
 * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
 *  or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
 *  loadFactor is less than 1
 */
public FastByIDMap(final int size, final int maxSize, final float loadFactor) {
    Preconditions.checkArgument(size >= 0, "size must be at least 0");
    Preconditions.checkArgument(loadFactor >= 1.0f,
            "loadFactor must be at least 1.0");
    this.loadFactor = loadFactor;
    final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
    Preconditions
            .checkArgument(size < max, "size must be less than " + max);
    Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
    final int hashSize = RandomUtils
            .nextTwinPrime((int) (loadFactor * size));
    keys = new long[hashSize];
    Arrays.fill(keys, NULL);
    values = (V[]) new Object[hashSize];
    this.maxSize = maxSize;
    this.countingAccesses = maxSize != Integer.MAX_VALUE;
    this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
}
 
Example #3
Source File: TDigestTest.java    From streaminer with Apache License 2.0 6 votes vote down vote up
@Test
public void testSequentialPoints() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new AbstractContinousDistribution() {
            double base = 0;

            @Override
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                "sequential", true, gen);
    }
}
 
Example #4
Source File: TDigestTest.java    From streaminer with Apache License 2.0 6 votes vote down vote up
@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(mix, 100, new double[]{0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999}, "mixture", false, gen);
    }
}
 
Example #5
Source File: ParallelSGDFactorizer.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
protected void initialize() {
    final RandomWrapper random = RandomUtils.getRandom();
    userVectors = new double[dataModel.getNumUsers()][rank];
    itemVectors = new double[dataModel.getNumItems()][rank];

    final double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian() * NOISE;
        }
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian() * NOISE;
        }
    }
}
 
Example #6
Source File: FastMap.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new  whose capacity can accommodate the given number of entries without rehash.
 *
 * @param size desired capacity
 * @param maxSize max capacity
 * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
 *  or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
 *  loadFactor is less than 1
 */
public FastMap(final int size, final int maxSize, final float loadFactor) {
    Preconditions.checkArgument(size >= 0, "size must be at least 0");
    Preconditions.checkArgument(loadFactor >= 1.0f,
            "loadFactor must be at least 1.0");
    this.loadFactor = loadFactor;
    final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
    Preconditions
            .checkArgument(size < max, "size must be less than " + max);
    Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
    final int hashSize = RandomUtils
            .nextTwinPrime((int) (loadFactor * size));
    keys = (K[]) new Object[hashSize];
    values = (V[]) new Object[hashSize];
    this.maxSize = maxSize;
    this.countingAccesses = maxSize != Integer.MAX_VALUE;
    this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
}
 
Example #7
Source File: ALSWRFactorizer.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
Features(final ALSWRFactorizer factorizer) {
    dataModel = factorizer.dataModel;
    numFeatures = factorizer.numFeatures;
    final Random random = RandomUtils.getRandom();
    M = new double[dataModel.getNumItems()][numFeatures];
    final LongPrimitiveIterator itemIDsIterator = dataModel
            .getItemIDs();
    while (itemIDsIterator.hasNext()) {
        final long itemID = itemIDsIterator.nextLong();
        final int itemIDIndex = factorizer.itemIndex(itemID);
        M[itemIDIndex][0] = averateRating(itemID);
        for (int feature = 1; feature < numFeatures; feature++) {
            M[itemIDIndex][feature] = random.nextDouble() * 0.1;
        }
    }
    U = new double[dataModel.getNumUsers()][numFeatures];
}
 
Example #8
Source File: FixedSizeSamplingIterator.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
public FixedSizeSamplingIterator(final int size, final Iterator<T> source) {
    final List<T> buf = Lists.newArrayListWithCapacity(size);
    int sofar = 0;
    final Random random = RandomUtils.getRandom();
    while (source.hasNext()) {
        final T v = source.next();
        sofar++;
        if (buf.size() < size) {
            buf.add(v);
        } else {
            final int position = random.nextInt(sofar);
            if (position < buf.size()) {
                buf.set(position, v);
            }
        }
    }
    delegate = buf.iterator();
}
 
Example #9
Source File: RatingSGDFactorizer.java    From elasticsearch-taste with Apache License 2.0 5 votes vote down vote up
protected void shufflePreferences() {
    final RandomWrapper random = RandomUtils.getRandom();
    /* Durstenfeld shuffle */
    for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) {
        final int swapPos = random.nextInt(currentPos + 1);
        swapCachedPreferences(currentPos, swapPos);
    }
}
 
Example #10
Source File: UpperQuantileTest.java    From log-synth with Apache License 2.0 5 votes vote down vote up
@Before
public void generate() {
    RandomUtils.useTestSeed();
    uq = new UpperQuantile(101);
    data = new double[1001];
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < 1001; i++) {
        double x = gen.nextDouble();
        data[i] = x;
        uq.add(x);
    }
    Arrays.sort(data);
}
 
Example #11
Source File: User.java    From log-synth with Apache License 2.0 5 votes vote down vote up
public User(InetAddress address, String geoCode, TermGenerator terms, double period) {
    this.terms = terms;
    this.geoCode = geoCode;
    this.address = address;
    this.rate = period;
    this.sessionTimeDistribution = new Exponential(period, RandomUtils.getRandom());

    id = idCounter.addAndGet(1);
    nextSession = sessionTimeDistribution.nextDouble();
}
 
Example #12
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
public void testScaling() throws FileNotFoundException {
    Random gen = RandomUtils.getRandom();
    PrintWriter out = new PrintWriter(new FileOutputStream("error-scaling.tsv"));
    try {
        out.printf("pass\tcompression\tq\terror\tsize\n");
        // change to 50 passes for better graphs
        int n = repeats() * repeats();
        for (int k = 0; k < n; k++) {
            List<Double> data = Lists.newArrayList();
            for (int i = 0; i < 100000; i++) {
                data.add(gen.nextDouble());
            }
            Collections.sort(data);

            for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) {
                TDigest dist = new TDigest(compression, gen);
                for (Double x : data) {
                    dist.add(x);
                }
                dist.compress();

                for (double q : new double[]{0.001, 0.01, 0.1, 0.5}) {
                    double estimate = dist.quantile(q);
                    double actual = data.get((int) (q * data.size()));
                    out.printf("%d\t%.0f\t%.3f\t%.9f\t%d\n", k, compression, q, estimate - actual, dist.byteSize());
                    out.flush();
                }
            }
        }
    } finally {
        out.close();
        new File("error-scaling.tsv").delete();
    }
}
 
Example #13
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
public void compareToQDigest() {
    Random rand = RandomUtils.getRandom();

    for (int i = 0; i < repeats(); i++) {
        compare(new Gamma(0.1, 0.1, rand), "gamma", 1L << 48, rand);
        compare(new Uniform(0, 1, rand), "uniform", 1L << 48, rand);
    }
}
 
Example #14
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
    public void testGamma() {
        // this Gamma distribution is very heavily skewed.  The 0.1%-ile is 6.07e-30 while
        // the median is 0.006 and the 99.9th %-ile is 33.6 while the mean is 1.
        // this severe skew means that we have to have positional accuracy that
        // varies by over 11 orders of magnitude.
        Random gen = RandomUtils.getRandom();
        for (int i = 0; i < repeats(); i++) {
            runTest(new Gamma(0.1, 0.1, gen), 100,
//                    new double[]{6.0730483624079e-30, 6.0730483624079e-20, 6.0730483627432e-10, 5.9339110446023e-03,
//                            2.6615455373884e+00, 1.5884778179295e+01, 3.3636770117188e+01},
                    new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                    "gamma", true, gen);
        }
    }
 
Example #15
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
public void testUniform() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new Uniform(0, 1, gen), 100,
                new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                "uniform", true, gen);
    }
}
 
Example #16
Source File: RatingSGDFactorizer.java    From elasticsearch-taste with Apache License 2.0 5 votes vote down vote up
protected void prepareTraining() {
    final RandomWrapper random = RandomUtils.getRandom();
    userVectors = new double[dataModel.getNumUsers()][numFeatures];
    itemVectors = new double[dataModel.getNumItems()][numFeatures];

    final double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian()
                    * randomNoise;
        }
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian()
                    * randomNoise;
        }
    }

    cachePreferences();
    shufflePreferences();
}
 
Example #17
Source File: FastIDSet.java    From elasticsearch-taste with Apache License 2.0 5 votes vote down vote up
public FastIDSet(final int size, final float loadFactor) {
    Preconditions.checkArgument(size >= 0, "size must be at least 0");
    Preconditions.checkArgument(loadFactor >= 1.0f,
            "loadFactor must be at least 1.0");
    this.loadFactor = loadFactor;
    final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
    Preconditions.checkArgument(size < max, "size must be less than %d",
            max);
    final int hashSize = RandomUtils
            .nextTwinPrime((int) (loadFactor * size));
    keys = new long[hashSize];
    Arrays.fill(keys, NULL);
}
 
Example #18
Source File: AbstractDifferenceRecommenderEvaluator.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
protected AbstractDifferenceRecommenderEvaluator() {
    random = RandomUtils.getRandom();
}
 
Example #19
Source File: GenericItemSimilarity.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
@Override
public int hashCode() {
    return (int) itemID1 ^ (int) itemID2
            ^ RandomUtils.hashDouble(value);
}
 
Example #20
Source File: GenericUserSimilarity.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
@Override
public int hashCode() {
    return (int) userID1 ^ (int) userID2
            ^ RandomUtils.hashDouble(value);
}
 
Example #21
Source File: DateSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("UnusedDeclaration")
public void setEnd(String end) throws ParseException {
    this.end = df.parse(end).getTime();
    base = new Uniform(0, this.end - this.start, RandomUtils.getRandom());
}
 
Example #22
Source File: DateSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("UnusedDeclaration")
public void setStart(String start) throws ParseException {
    this.start = df.parse(start).getTime();
    base = new Uniform(0, this.end - this.start, RandomUtils.getRandom());
}
 
Example #23
Source File: ArrivalSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Override
public void setSeed(long seed) {
    base = RandomUtils.getRandom(seed);
}
 
Example #24
Source File: ArrivalSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
public ArrivalSampler() {
    base = RandomUtils.getRandom();
}
 
Example #25
Source File: ZipSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unused")
public void setSeed(long seed) {
    rand = RandomUtils.getRandom(seed);
}
 
Example #26
Source File: IntegerSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Override
public void setSeed(long seed) {
    base = RandomUtils.getRandom(seed);
}
 
Example #27
Source File: IntegerSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("WeakerAccess")
public IntegerSampler() {
    base = RandomUtils.getRandom();
}
 
Example #28
Source File: TDigestTest.java    From t-digest with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void freezeSeed() {
    RandomUtils.useTestSeed();
}
 
Example #29
Source File: AVLGroupTreeTest.java    From t-digest with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() {
    RandomUtils.useTestSeed();
}
 
Example #30
Source File: MergingDigestTest.java    From t-digest with Apache License 2.0 4 votes vote down vote up
@Before
public void testSetUp() {
    RandomUtils.useTestSeed();
}