org.apache.spark.util.random.XORShiftRandom Scala Examples

The following examples show how to use org.apache.spark.util.random.XORShiftRandom. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: BaggedPoint.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tree.impl

import org.apache.commons.math3.distribution.PoissonDistribution

import org.apache.spark.rdd.RDD
import org.apache.spark.util.Utils
import org.apache.spark.util.random.XORShiftRandom


  def convertToBaggedRDD[Datum] (
      input: RDD[Datum],
      subsamplingRate: Double,
      numSubsamples: Int,
      withReplacement: Boolean,
      seed: Long = Utils.random.nextLong()): RDD[BaggedPoint[Datum]] = {
    if (withReplacement) {
      convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples, seed)
    } else {
      if (numSubsamples == 1 && subsamplingRate == 1.0) {
        convertToBaggedRDDWithoutSampling(input)
      } else {
        convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples, seed)
      }
    }
  }

  private def convertToBaggedRDDSamplingWithoutReplacement[Datum] (
      input: RDD[Datum],
      subsamplingRate: Double,
      numSubsamples: Int,
      seed: Long): RDD[BaggedPoint[Datum]] = {
    input.mapPartitionsWithIndex { (partitionIndex, instances) =>
      // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
      val rng = new XORShiftRandom
      rng.setSeed(seed + partitionIndex + 1)
      instances.map { instance =>
        val subsampleWeights = new Array[Double](numSubsamples)
        var subsampleIndex = 0
        while (subsampleIndex < numSubsamples) {
          val x = rng.nextDouble()
          subsampleWeights(subsampleIndex) = {
            if (x < subsamplingRate) 1.0 else 0.0
          }
          subsampleIndex += 1
        }
        new BaggedPoint(instance, subsampleWeights)
      }
    }
  }

  private def convertToBaggedRDDSamplingWithReplacement[Datum] (
      input: RDD[Datum],
      subsample: Double,
      numSubsamples: Int,
      seed: Long): RDD[BaggedPoint[Datum]] = {
    input.mapPartitionsWithIndex { (partitionIndex, instances) =>
      // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
      val poisson = new PoissonDistribution(subsample)
      poisson.reseedRandomGenerator(seed + partitionIndex + 1)
      instances.map { instance =>
        val subsampleWeights = new Array[Double](numSubsamples)
        var subsampleIndex = 0
        while (subsampleIndex < numSubsamples) {
          subsampleWeights(subsampleIndex) = poisson.sample()
          subsampleIndex += 1
        }
        new BaggedPoint(instance, subsampleWeights)
      }
    }
  }

  private def convertToBaggedRDDWithoutSampling[Datum] (
      input: RDD[Datum]): RDD[BaggedPoint[Datum]] = {
    input.map(datum => new BaggedPoint(datum, Array(1.0)))
  }

} 
Example 2
Source File: randomExpressions.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.TaskContext
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.util.Utils
import org.apache.spark.util.random.XORShiftRandom


@ExpressionDescription(
  usage = "_FUNC_(a) - Returns a random column with i.i.d. gaussian random distribution.")
case class Randn(seed: Long) extends RDG {
  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()

  def this() = this(Utils.random.nextLong())

  def this(seed: Expression) = this(seed match {
    case IntegerLiteral(s) => s
    case _ => throw new AnalysisException("Input argument to randn must be an integer literal.")
  })

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val rngTerm = ctx.freshName("rng")
    val className = classOf[XORShiftRandom].getName
    ctx.addMutableState(className, rngTerm,
      s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());")
    ev.copy(code = s"""
      final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""", isNull = "false")
  }
} 
Example 3
Source File: TestLPSolver.scala    From spark-lp   with Apache License 2.0 5 votes vote down vote up
object TestLPSolver {
  def main(args: Array[String]) {

    val rnd = new Random(12345)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLPSolver")
    val sc = new SparkContext(sparkConf)

    val n = 1000 // Transpose constraint matrix row count.
    val m = 100 // Transpose constraint matrix column count.
    val numPartitions = 2

    // Generate the starting vector from uniform distribution U(3.0, 5.0)
    println("generate x")
    val x0 = RandomRDDs.uniformRDD(sc, n, numPartitions).map(v => 3.0 + 2.0 * v).glom.map(new DenseVector(_))

    // Generate the transpose constraint matrix 'B' using sparse uniformly generated values.
    println("generate B")
    val B = new RandomVectorRDD(sc,
      n,
      m,
      numPartitions,
      new SparseStandardNormalGenerator(0.1),
      rnd.nextLong)

    // Generate the cost vector 'c' using uniformly generated values.
    println("generate c")
    val c = RandomRDDs.uniformRDD(sc, n, numPartitions, rnd.nextLong).glom.map(new DenseVector(_))
    // Compute 'b' using the starting 'x' vector.
    println("generate b")
    val b = (new LinopMatrixAdjoint(B))(x0)

    // Solve the linear program using LP.solve, finding the optimal x vector 'optimalX'.
    println("Start solving ...")
    val (optimalVal, _) = LP.solve(c, B, b, sc=sc)
    println("optimalVal: " + optimalVal)
    //println("optimalX: " + optimalX.collectElements.mkString(", "))

    sc.stop()
  }
} 
Example 4
Source File: TestLinearProgram.scala    From spark-tfocs   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.optimization.tfocs.examples

import scala.util.Random

import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._
import org.apache.spark.mllib.optimization.tfocs.SolverSLP
import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint
import org.apache.spark.mllib.random.{ RandomDataGenerator, RandomRDDs }
import org.apache.spark.mllib.rdd.RandomVectorRDD
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.util.random.XORShiftRandom


object TestLinearProgram {
  def main(args: Array[String]) {

    val rnd = new Random(34324)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLinearProgram")
    val sc = new SparkContext(sparkConf)

    val n = 5000 // Tranpose constraint matrix row count.
    val m = n / 2 // Transpose constrint matrix column count.

    // Generate a starting 'x' vector, using normally generated values.
    val x = RandomRDDs.normalRDD(sc, n).map(_ + 10).glom.map(new DenseVector(_))

    // Generate the transpose constraint matrix 'A' using sparse normally generated values.
    val A = new RandomVectorRDD(sc,
      n,
      m,
      sc.defaultMinPartitions,
      new SparseStandardNormalGenerator(0.01),
      rnd.nextLong)

    // Generate the cost vector 'c' using normally generated values.
    val c = RandomRDDs.normalRDD(sc, n, 0, rnd.nextLong).glom.map(new DenseVector(_))

    // Compute 'b' using the starting 'x' vector.
    val b = new LinopMatrixAdjoint(A)(x)

    val mu = 1e-2

    // Solve the linear program using SolverSLP, finding the optimal x vector 'optimalX'.
    val (optimalX, _) = SolverSLP.run(c, A, b, mu)
    println("optimalX: " + optimalX.collectElements.mkString(", "))

    sc.stop()
  }
} 
Example 5
Source File: randomExpressions.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
import org.apache.spark.util.random.XORShiftRandom


// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = """_FUNC_([seed]) - Returns a random value with independent and identically distributed (i.i.d.) values drawn from the standard normal distribution.""",
  examples = """
    Examples:
      > SELECT _FUNC_();
       -0.3254147983080288
      > SELECT _FUNC_(0);
       1.1164209726833079
      > SELECT _FUNC_(null);
       1.1164209726833079
  """,
  note = "The function is non-deterministic in general case.")
// scalastyle:on line.size.limit
case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed {

  def this() = this(Literal(Utils.random.nextLong(), LongType))

  override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType))

  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val className = classOf[XORShiftRandom].getName
    val rngTerm = ctx.addMutableState(className, "rng")
    ctx.addPartitionInitializationStatement(
      s"$rngTerm = new $className(${seed}L + partitionIndex);")
    ev.copy(code = code"""
      final ${CodeGenerator.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""",
      isNull = FalseLiteral)
  }

  override def freshCopy(): Randn = Randn(child)
}

object Randn {
  def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
} 
Example 6
Source File: Vector.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import scala.language.implicitConversions
import scala.util.Random

import org.apache.spark.util.random.XORShiftRandom

@deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
class Vector(val elements: Array[Double]) extends Serializable {
  def length: Int = elements.length

  def apply(index: Int): Double = elements(index)

  def + (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) + other(i))
  }

  def add(other: Vector): Vector = this + other

  def - (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) - other(i))
  }

  def subtract(other: Vector): Vector = this - other

  def dot(other: Vector): Double = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    var ans = 0.0
    var i = 0
    while (i < length) {
      ans += this(i) * other(i)
      i += 1
    }
    ans
  }

  
  def random(length: Int, random: Random = new XORShiftRandom()): Vector =
    Vector(length, _ => random.nextDouble())

  class Multiplier(num: Double) {
    def * (vec: Vector): Vector = vec * num
  }

  implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num)

  implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
    def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2

    def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length)
  }

} 
Example 7
Source File: Vector.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import scala.language.implicitConversions
import scala.util.Random

import org.apache.spark.util.random.XORShiftRandom

@deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
class Vector(val elements: Array[Double]) extends Serializable {
  def length: Int = elements.length

  def apply(index: Int): Double = elements(index)

  def + (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) + other(i))
  }

  def add(other: Vector): Vector = this + other

  def - (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) - other(i))
  }

  def subtract(other: Vector): Vector = this - other

  def dot(other: Vector): Double = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    var ans = 0.0
    var i = 0
    while (i < length) {
      ans += this(i) * other(i)
      i += 1
    }
    ans
  }

  
  def random(length: Int, random: Random = new XORShiftRandom()): Vector =
    Vector(length, _ => random.nextDouble())

  class Multiplier(num: Double) {
    def * (vec: Vector): Vector = vec * num
  }

  implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num)

  implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
    def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2

    def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length)
  }

} 
Example 8
Source File: randomExpressions.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.TaskContext
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.util.Utils
import org.apache.spark.util.random.XORShiftRandom


case class Randn(seed: Long) extends RDG {
  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()

  def this() = this(Utils.random.nextLong())

  def this(seed: Expression) = this(seed match {
    case IntegerLiteral(s) => s
    case _ => throw new AnalysisException("Input argument to rand must be an integer literal.")
  })

  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
    val rngTerm = ctx.freshName("rng")
    val className = classOf[XORShiftRandom].getName
    ctx.addMutableState(className, rngTerm,
      s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());")
    ev.isNull = "false"
    s"""
      final ${ctx.javaType(dataType)} ${ev.primitive} = $rngTerm.nextGaussian();
    """
  }
} 
Example 9
Source File: Vector.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import scala.language.implicitConversions
import scala.util.Random

import org.apache.spark.util.random.XORShiftRandom

@deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
class Vector(val elements: Array[Double]) extends Serializable {
  def length: Int = elements.length

  def apply(index: Int): Double = elements(index)

  def + (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) + other(i))
  }

  def add(other: Vector): Vector = this + other

  def - (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) - other(i))
  }

  def subtract(other: Vector): Vector = this - other

  def dot(other: Vector): Double = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    var ans = 0.0
    var i = 0
    while (i < length) {
      ans += this(i) * other(i)
      i += 1
    }
    ans
  }

  
  def random(length: Int, random: Random = new XORShiftRandom()): Vector =
    Vector(length, _ => random.nextDouble())

  class Multiplier(num: Double) {
    def * (vec: Vector): Vector = vec * num
  }

  implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num)

  implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
    def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2

    def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length)
  }

} 
Example 10
Source File: randomExpressions.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.TaskContext
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.util.Utils
import org.apache.spark.util.random.XORShiftRandom


case class Randn(seed: Long) extends RDG {
  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()

  def this() = this(Utils.random.nextLong())

  def this(seed: Expression) = this(seed match {
    case IntegerLiteral(s) => s
    case _ => throw new AnalysisException("Input argument to rand must be an integer literal.")
  })

  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
    val rngTerm = ctx.freshName("rng")
    val className = classOf[XORShiftRandom].getName
    ctx.addMutableState(className, rngTerm,
      s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());")
    ev.isNull = "false"
    s"""
      final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();
    """
  }
} 
Example 11
Source File: Vector.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import scala.language.implicitConversions
import scala.util.Random

import org.apache.spark.util.random.XORShiftRandom

@deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
class Vector(val elements: Array[Double]) extends Serializable {
  def length: Int = elements.length

  def apply(index: Int): Double = elements(index)

  def + (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) + other(i))
  }

  def add(other: Vector): Vector = this + other

  def - (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) - other(i))
  }

  def subtract(other: Vector): Vector = this - other

  def dot(other: Vector): Double = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    var ans = 0.0
    var i = 0
    while (i < length) {
      ans += this(i) * other(i)
      i += 1
    }
    ans
  }

  
  def random(length: Int, random: Random = new XORShiftRandom()): Vector =
    Vector(length, _ => random.nextDouble())

  class Multiplier(num: Double) {
    def * (vec: Vector): Vector = vec * num
  }

  implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num)

  implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
    def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2

    def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length)
  }

}