Java Code Examples for org.apache.mahout.common.RandomUtils#getRandom()

The following examples show how to use org.apache.mahout.common.RandomUtils#getRandom() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TDigestTest.java    From streaminer with Apache License 2.0 6 votes vote down vote up
@Test()
public void testSizeControl() throws IOException {
    // very slow running data generator.  Don't want to run this normally.  To run slow tests use
    // mvn test -DrunSlowTests=true
    assumeTrue(Boolean.parseBoolean(System.getProperty("runSlowTests")));

    Random gen = RandomUtils.getRandom();
    PrintWriter out = new PrintWriter(new FileOutputStream("scaling.tsv"));
    out.printf("k\tsamples\tcompression\tsize1\tsize2\n");
    for (int k = 0; k < 20; k++) {
        for (int size : new int[]{10, 100, 1000, 10000}) {
            for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) {
                TDigest dist = new TDigest(compression, gen);
                for (int i = 0; i < size * 1000; i++) {
                    dist.add(gen.nextDouble());
                }
                out.printf("%d\t%d\t%.0f\t%d\t%d\n", k, size, compression, dist.smallByteSize(), dist.byteSize());
                out.flush();
            }
        }
    }
    out.printf("\n");
    out.close();
    new File("scaling.tsv").delete();
}
 
Example 2
Source File: ALSWRFactorizer.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
Features(final ALSWRFactorizer factorizer) {
    dataModel = factorizer.dataModel;
    numFeatures = factorizer.numFeatures;
    final Random random = RandomUtils.getRandom();
    M = new double[dataModel.getNumItems()][numFeatures];
    final LongPrimitiveIterator itemIDsIterator = dataModel
            .getItemIDs();
    while (itemIDsIterator.hasNext()) {
        final long itemID = itemIDsIterator.nextLong();
        final int itemIDIndex = factorizer.itemIndex(itemID);
        M[itemIDIndex][0] = averateRating(itemID);
        for (int feature = 1; feature < numFeatures; feature++) {
            M[itemIDIndex][feature] = random.nextDouble() * 0.1;
        }
    }
    U = new double[dataModel.getNumUsers()][numFeatures];
}
 
Example 3
Source File: ParallelSGDFactorizer.java    From elasticsearch-taste with Apache License 2.0 6 votes vote down vote up
protected void initialize() {
    final RandomWrapper random = RandomUtils.getRandom();
    userVectors = new double[dataModel.getNumUsers()][rank];
    itemVectors = new double[dataModel.getNumItems()][rank];

    final double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian() * NOISE;
        }
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian() * NOISE;
        }
    }
}
 
Example 4
Source File: TDigestTest.java    From streaminer with Apache License 2.0 6 votes vote down vote up
@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(mix, 100, new double[]{0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999}, "mixture", false, gen);
    }
}
 
Example 5
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
public void testScaling() throws FileNotFoundException {
    Random gen = RandomUtils.getRandom();
    PrintWriter out = new PrintWriter(new FileOutputStream("error-scaling.tsv"));
    try {
        out.printf("pass\tcompression\tq\terror\tsize\n");
        // change to 50 passes for better graphs
        int n = repeats() * repeats();
        for (int k = 0; k < n; k++) {
            List<Double> data = Lists.newArrayList();
            for (int i = 0; i < 100000; i++) {
                data.add(gen.nextDouble());
            }
            Collections.sort(data);

            for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) {
                TDigest dist = new TDigest(compression, gen);
                for (Double x : data) {
                    dist.add(x);
                }
                dist.compress();

                for (double q : new double[]{0.001, 0.01, 0.1, 0.5}) {
                    double estimate = dist.quantile(q);
                    double actual = data.get((int) (q * data.size()));
                    out.printf("%d\t%.0f\t%.3f\t%.9f\t%d\n", k, compression, q, estimate - actual, dist.byteSize());
                    out.flush();
                }
            }
        }
    } finally {
        out.close();
        new File("error-scaling.tsv").delete();
    }
}
 
Example 6
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
public void compareToQDigest() {
    Random rand = RandomUtils.getRandom();

    for (int i = 0; i < repeats(); i++) {
        compare(new Gamma(0.1, 0.1, rand), "gamma", 1L << 48, rand);
        compare(new Uniform(0, 1, rand), "uniform", 1L << 48, rand);
    }
}
 
Example 7
Source File: UpperQuantileTest.java    From log-synth with Apache License 2.0 5 votes vote down vote up
@Before
public void generate() {
    RandomUtils.useTestSeed();
    uq = new UpperQuantile(101);
    data = new double[1001];
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < 1001; i++) {
        double x = gen.nextDouble();
        data[i] = x;
        uq.add(x);
    }
    Arrays.sort(data);
}
 
Example 8
Source File: User.java    From log-synth with Apache License 2.0 5 votes vote down vote up
public User(InetAddress address, String geoCode, TermGenerator terms, double period) {
    this.terms = terms;
    this.geoCode = geoCode;
    this.address = address;
    this.rate = period;
    this.sessionTimeDistribution = new Exponential(period, RandomUtils.getRandom());

    id = idCounter.addAndGet(1);
    nextSession = sessionTimeDistribution.nextDouble();
}
 
Example 9
Source File: RatingSGDFactorizer.java    From elasticsearch-taste with Apache License 2.0 5 votes vote down vote up
protected void shufflePreferences() {
    final RandomWrapper random = RandomUtils.getRandom();
    /* Durstenfeld shuffle */
    for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) {
        final int swapPos = random.nextInt(currentPos + 1);
        swapCachedPreferences(currentPos, swapPos);
    }
}
 
Example 10
Source File: TDigestTest.java    From streaminer with Apache License 2.0 5 votes vote down vote up
@Test
public void testUniform() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new Uniform(0, 1, gen), 100,
                new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                "uniform", true, gen);
    }
}
 
Example 11
Source File: DateSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("UnusedDeclaration")
public void setEnd(String end) throws ParseException {
    this.end = df.parse(end).getTime();
    base = new Uniform(0, this.end - this.start, RandomUtils.getRandom());
}
 
Example 12
Source File: LogisticTest.java    From ml-models with Apache License 2.0 4 votes vote down vote up
@Test
public void makeModel() throws Exception {

    String csvFile = "/Users/laurenshin/documents/linreg-graph-analytics/src/test/resources/iris-full.csv";
    String line = "";
    String csvSplitBy = ",";

    List<Map<String,Double>> data = new ArrayList<>();
    List<String> target = new ArrayList<>();
    List<Integer> order = new ArrayList<>();

    /*Map<String, Integer> stringToInt = new HashMap<>();
    Map<Integer, String> intToString = new HashMap<>();

    stringToInt.put("Iris-setosa", 0);
    stringToInt.put("Iris-versicolor", 1);
    stringToInt.put("Iris-virginica", 2);
    intToString.put(0, "Iris-setosa");
    intToString.put(1, "Iris-versicolor");
    intToString.put(2, "Iris-virginica");*/

    try (BufferedReader br = new BufferedReader(new FileReader(csvFile))){
        br.readLine(); //skip headers
        int i = 0;
        while ((line = br.readLine()) != null) {
            String[] flower = line.split(csvSplitBy);
            Map<String, Double> v = new HashMap<>(4);
            v.put("sepallength", Double.parseDouble(flower[1])); //sepal length
            v.put("sepalwidth", Double.parseDouble(flower[2])); //sepal width
            v.put("petallength", Double.parseDouble(flower[3])); //petal length
            v.put("petalwidth", Double.parseDouble(flower[4])); //petal width
            data.add(v);
            target.add(flower[5]); //class
            order.add(i++);
        }
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("unable to read csv file for test data");
    }
    RandomUtils.useTestSeed();
    Random random = RandomUtils.getRandom();
    Collections.shuffle(order, random);
    List<Integer> train = order.subList(0, 100);
    List<Integer> test = order.subList(100, 150);

    db.execute("CALL regression.logistic.create('model', ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], " +
            "{sepallength:'float', sepalwidth:'float', petallength:'float', petalwidth:'float'}, {prior:'L2'})").close();
    for (int pass = 0; pass < 30; pass++) {
        Collections.shuffle(train, random);
        for (int j : train) {
            db.execute("CALL regression.logistic.add('model', {output}, {inputs})", map("inputs", data.get(j), "output", target.get(j)));
        }
    }
    int successes = 0;
    int failures = 0;
    for (int k : test) {
        String t;
        String guess = ((String) db.execute("RETURN regression.logistic.predict('model', {inputs}) as prediction", map("inputs", data.get(k))).next().get("prediction"));
        if (guess.equals(target.get(k))) {
            t = "SUCCESS!";
            successes++;
        } else {
            t = "FAIL!";
            failures++;
        }
        System.out.format("Expected: %s, Actual: %s %s%n", target.get(k), guess, t);
    }
    System.out.format("SUCCESSES: %d%n", successes);
    System.out.format("FAILURES: %d%n", failures);


    db.execute("CALL regression.logistic.delete('model')");
}
 
Example 13
Source File: ArrivalSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Override
public void setSeed(long seed) {
    base = RandomUtils.getRandom(seed);
}
 
Example 14
Source File: ZipSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unused")
public void setSeed(long seed) {
    rand = RandomUtils.getRandom(seed);
}
 
Example 15
Source File: SVDPlusPlusFactorizer.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
@Override
protected void prepareTraining() {
    super.prepareTraining();
    final Random random = RandomUtils.getRandom();

    p = new double[dataModel.getNumUsers()][numFeatures];
    for (int i = 0; i < p.length; i++) {
        for (int feature = 0; feature < FEATURE_OFFSET; feature++) {
            p[i][feature] = 0;
        }
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            p[i][feature] = random.nextGaussian() * randomNoise;
        }
    }

    y = new double[dataModel.getNumItems()][numFeatures];
    for (int i = 0; i < y.length; i++) {
        for (int feature = 0; feature < FEATURE_OFFSET; feature++) {
            y[i][feature] = 0;
        }
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            y[i][feature] = random.nextGaussian() * randomNoise;
        }
    }

    /* get internal item IDs which we will need several times */
    itemsByUser = Maps.newHashMap();
    final LongPrimitiveIterator userIDs = dataModel.getUserIDs();
    while (userIDs.hasNext()) {
        final long userId = userIDs.nextLong();
        final int userIndex = userIndex(userId);
        final FastIDSet itemIDsFromUser = dataModel
                .getItemIDsFromUser(userId);
        final List<Integer> itemIndexes = Lists
                .newArrayListWithCapacity(itemIDsFromUser.size());
        itemsByUser.put(userIndex, itemIndexes);
        for (final long itemID2 : itemIDsFromUser) {
            final int i2 = itemIndex(itemID2);
            itemIndexes.add(i2);
        }
    }
}
 
Example 16
Source File: DateSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("UnusedDeclaration")
public void setStart(String start) throws ParseException {
    this.start = df.parse(start).getTime();
    base = new Uniform(0, this.end - this.start, RandomUtils.getRandom());
}
 
Example 17
Source File: SamplingLongPrimitiveIterator.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
public SamplingLongPrimitiveIterator(final LongPrimitiveIterator delegate,
        final double samplingRate) {
    this(RandomUtils.getRandom(), delegate, samplingRate);
}
 
Example 18
Source File: IntegerSampler.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Override
public void setSeed(long seed) {
    base = RandomUtils.getRandom(seed);
}
 
Example 19
Source File: GenericRecommenderIRStatsEvaluator.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
public GenericRecommenderIRStatsEvaluator(
        final RelevantItemsDataSplitter dataSplitter) {
    Preconditions.checkNotNull(dataSplitter);
    random = RandomUtils.getRandom();
    this.dataSplitter = dataSplitter;
}
 
Example 20
Source File: AbstractDifferenceEvaluator.java    From elasticsearch-taste with Apache License 2.0 4 votes vote down vote up
protected AbstractDifferenceEvaluator() {
    random = RandomUtils.getRandom();
    maxPreference = Float.NaN;
    minPreference = Float.NaN;
}