org.apache.spark.util.random.BernoulliCellSampler Scala Examples

The following examples show how to use org.apache.spark.util.random.BernoulliCellSampler. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SampleNode.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.local

import org.apache.spark.sql.SQLConf
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}



case class SampleNode(
    conf: SQLConf,
    lowerBound: Double,
    upperBound: Double,
    withReplacement: Boolean,
    seed: Long,
    child: LocalNode) extends UnaryLocalNode(conf) {

  override def output: Seq[Attribute] = child.output

  private[this] var iterator: Iterator[InternalRow] = _

  private[this] var currentRow: InternalRow = _

  override def open(): Unit = {
    child.open()
    val sampler =
      if (withReplacement) {
        // Disable gap sampling since the gap sampling method buffers two rows internally,
        // requiring us to copy the row, which is more expensive than the random number generator.
        new PoissonSampler[InternalRow](upperBound - lowerBound, useGapSamplingIfPossible = false)
      } else {
        new BernoulliCellSampler[InternalRow](lowerBound, upperBound)
      }
    sampler.setSeed(seed)
    iterator = sampler.sample(child.asIterator)
  }

  override def next(): Boolean = {
    if (iterator.hasNext) {
      currentRow = iterator.next()
      true
    } else {
      false
    }
  }

  override def fetch(): InternalRow = currentRow

  override def close(): Unit = child.close()

} 
Example 2
Source File: SampleNodeSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.local

import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}


class SampleNodeSuite extends LocalNodeTest {

  private def testSample(withReplacement: Boolean): Unit = {
    val seed = 0L
    val lowerb = 0.0
    val upperb = 0.3
    val maybeOut = if (withReplacement) "" else "out"
    test(s"with$maybeOut replacement") {
      val inputData = (1 to 1000).map { i => (i, i) }.toArray
      val inputNode = new DummyNode(kvIntAttributes, inputData)
      val sampleNode = new SampleNode(conf, lowerb, upperb, withReplacement, seed, inputNode)
      val sampler =
        if (withReplacement) {
          new PoissonSampler[(Int, Int)](upperb - lowerb, useGapSamplingIfPossible = false)
        } else {
          new BernoulliCellSampler[(Int, Int)](lowerb, upperb)
        }
      sampler.setSeed(seed)
      val expectedOutput = sampler.sample(inputData.iterator).toArray
      val actualOutput = sampleNode.collect().map { case row =>
        (row.getInt(0), row.getInt(1))
      }
      assert(actualOutput === expectedOutput)
    }
  }

  testSample(withReplacement = true)
  testSample(withReplacement = false)
}