org.apache.spark.mllib.random.RandomDataGenerator Scala Examples

The following examples show how to use org.apache.spark.mllib.random.RandomDataGenerator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

Example 1

Source File: TestLPSolver.scala From spark-lp with Apache License 2.0

5 votes

object TestLPSolver {
  def main(args: Array[String]) {

    val rnd = new Random(12345)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLPSolver")
    val sc = new SparkContext(sparkConf)

    val n = 1000 // Transpose constraint matrix row count.
    val m = 100 // Transpose constraint matrix column count.
    val numPartitions = 2

    // Generate the starting vector from uniform distribution U(3.0, 5.0)
    println("generate x")
    val x0 = RandomRDDs.uniformRDD(sc, n, numPartitions).map(v => 3.0 + 2.0 * v).glom.map(new DenseVector(_))

    // Generate the transpose constraint matrix 'B' using sparse uniformly generated values.
    println("generate B")
    val B = new RandomVectorRDD(sc,
      n,
      m,
      numPartitions,
      new SparseStandardNormalGenerator(0.1),
      rnd.nextLong)

    // Generate the cost vector 'c' using uniformly generated values.
    println("generate c")
    val c = RandomRDDs.uniformRDD(sc, n, numPartitions, rnd.nextLong).glom.map(new DenseVector(_))
    // Compute 'b' using the starting 'x' vector.
    println("generate b")
    val b = (new LinopMatrixAdjoint(B))(x0)

    // Solve the linear program using LP.solve, finding the optimal x vector 'optimalX'.
    println("Start solving ...")
    val (optimalVal, _) = LP.solve(c, B, b, sc=sc)
    println("optimalVal: " + optimalVal)
    //println("optimalX: " + optimalX.collectElements.mkString(", "))

    sc.stop()
  }
}

Example 2

Source File: TestLinearProgram.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.examples

import scala.util.Random

import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._
import org.apache.spark.mllib.optimization.tfocs.SolverSLP
import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint
import org.apache.spark.mllib.random.{ RandomDataGenerator, RandomRDDs }
import org.apache.spark.mllib.rdd.RandomVectorRDD
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.util.random.XORShiftRandom


object TestLinearProgram {
  def main(args: Array[String]) {

    val rnd = new Random(34324)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLinearProgram")
    val sc = new SparkContext(sparkConf)

    val n = 5000 // Tranpose constraint matrix row count.
    val m = n / 2 // Transpose constrint matrix column count.

    // Generate a starting 'x' vector, using normally generated values.
    val x = RandomRDDs.normalRDD(sc, n).map(_ + 10).glom.map(new DenseVector(_))

    // Generate the transpose constraint matrix 'A' using sparse normally generated values.
    val A = new RandomVectorRDD(sc,
      n,
      m,
      sc.defaultMinPartitions,
      new SparseStandardNormalGenerator(0.01),
      rnd.nextLong)

    // Generate the cost vector 'c' using normally generated values.
    val c = RandomRDDs.normalRDD(sc, n, 0, rnd.nextLong).glom.map(new DenseVector(_))

    // Compute 'b' using the starting 'x' vector.
    val b = new LinopMatrixAdjoint(A)(x)

    val mu = 1e-2

    // Solve the linear program using SolverSLP, finding the optimal x vector 'optimalX'.
    val (optimalX, _) = SolverSLP.run(c, A, b, mu)
    println("optimalX: " + optimalX.collectElements.mkString(", "))

    sc.stop()
  }
}

Example 3

Source File: RatingGenerator.scala From spark-sql-perf with Apache License 2.0

5 votes

package com.databricks.spark.sql.perf.mllib.data

import org.apache.spark.ml.recommendation.ALS.Rating
import org.apache.spark.mllib.random.RandomDataGenerator

import scala.collection.mutable

class RatingGenerator(
    private val numUsers: Int,
    private val numProducts: Int,
    private val implicitPrefs: Boolean) extends RandomDataGenerator[Rating[Int]] {

  private val rng = new java.util.Random()

  private val observed = new mutable.HashMap[(Int, Int), Boolean]()

  override def nextValue(): Rating[Int] = {
    var tuple = (rng.nextInt(numUsers),rng.nextInt(numProducts))
    while (observed.getOrElse(tuple,false)){
      tuple = (rng.nextInt(numUsers),rng.nextInt(numProducts))
    }
    observed += (tuple -> true)

    val rating = if (implicitPrefs) rng.nextInt(2)*1.0 else rng.nextDouble()*5

    new Rating(tuple._1, tuple._2, rating.toFloat)
  }

  override def setSeed(seed: Long) {
    rng.setSeed(seed)
  }

  override def copy(): RatingGenerator =
    new RatingGenerator(numUsers, numProducts, implicitPrefs)
}

Example 4

Source File: ItemSetGenerator.scala From spark-sql-perf with Apache License 2.0

5 votes

package com.databricks.spark.sql.perf.mllib.data

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.mllib.random.{PoissonGenerator, RandomDataGenerator}

class ItemSetGenerator(
    val numItems: Int,
    val avgItemSetSize: Int)
  extends RandomDataGenerator[Array[String]] {

  assert(avgItemSetSize > 2)
  assert(numItems > 2)

  private val rng = new java.util.Random()
  private val itemSetSizeRng = new PoissonGenerator(avgItemSetSize - 2)
  private val itemRng = new PoissonGenerator(numItems / 2.0)

  override def setSeed(seed: Long) {
    rng.setSeed(seed)
    itemSetSizeRng.setSeed(seed)
    itemRng.setSeed(seed)
  }

  override def nextValue(): Array[String] = {
    // 1. generate size of itemset
    val size = DataGenUtil.nextPoisson(itemSetSizeRng, v => v >= 1 && v <= numItems).toInt
    val arrayBuff = new ArrayBuffer[Int](size + 2)

    // 2. generate items in the itemset
    var i = 0
    while (i < size) {
      val nextVal = DataGenUtil.nextPoisson(itemRng, (item: Double) => {
        item >= 0 && item < numItems && !arrayBuff.contains(item)
      }).toInt
      arrayBuff.append(nextVal)
      i += 1
    }

    // 3 generate association rules by adding two computed items

    // 3.1 add a new item = (firstItem + numItems / 2) % numItems
    val newItem1 = (arrayBuff(0) + numItems / 2) % numItems
    if (!arrayBuff.contains(newItem1)) {
      arrayBuff.append(newItem1)
    }
    // 3.2 add a new item = (firstItem + secondItem) % numItems
    if (arrayBuff.size >= 2) {
      val newItem2 = (arrayBuff(0) + arrayBuff(1)) % numItems
      if (!arrayBuff.contains(newItem2)) {
        arrayBuff.append(newItem2)
      }
    }
    arrayBuff.map(_.toString).toArray
  }

  override def copy(): ItemSetGenerator
    = new ItemSetGenerator(numItems, avgItemSetSize)
}