breeze.linalg.DenseVector Scala Examples

The following examples show how to use breeze.linalg.DenseVector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkHdfsLR.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}
import org.apache.hadoop.conf.Configuration

import org.apache.spark._
import org.apache.spark.scheduler.InputFormatInfo



object SparkHdfsLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 2) {
      System.err.println("Usage: SparkHdfsLR <file> <iters>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
    val inputPath = args(0)
    val conf = new Configuration()
    val sc = new SparkContext(sparkConf,
      InputFormatInfo.computePreferredLocations(
        Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
      ))
    val lines = sc.textFile(inputPath)
    val points = lines.map(parsePoint _).cache()
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    sc.stop()
  }
} 
Example 2
Source File: Integrator.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.numerics

import breeze.linalg.DenseVector
import scalismo.common.{Scalar, VectorField}
import scalismo.geometry._
import scalismo.image.ScalarImage

case class Integrator[D: NDSpace](sampler: Sampler[D]) {

  def integrateScalar[A: Scalar](img: ScalarImage[D, A]): A = {
    integrateScalar(img.liftValues)
  }

  def integrateScalar[A: Scalar](f: Function1[Point[D], Option[A]]): A = {
    val scalar = Scalar[A]
    val zero = scalar.fromInt(0)
    val samples = sampler.sample
    val sum = samples.par.map {
      case (pt, p) =>
        scalar.toDouble(f(pt).getOrElse(zero)) * (1.0 / p.toFloat)
    }.sum
    scalar.fromDouble(sum / samples.size)
  }

  def integrateVector[DO: NDSpace](img: VectorField[D, DO]): EuclideanVector[DO] = {
    integrateVector(img.liftValues)
  }

  def integrateVector[DO: NDSpace](f: Function1[Point[D], Option[EuclideanVector[DO]]]): EuclideanVector[DO] = {
    val samples = sampler.sample

    val zeroVector = EuclideanVector.zeros[DO]
    val sum = samples.par
      .map { case (pt, p) => f(pt).getOrElse(zeroVector) * (1.0 / p) }
      .foldLeft(zeroVector)((a, b) => { a + b })
    sum * (1f / (sampler.numberOfPoints - 1))
  }

  def integrateVector(f: Function1[Point[D], Option[DenseVector[Double]]], dimensionality: Int): DenseVector[Double] = {
    val samples = sampler.sample

    val zeroVector = DenseVector.zeros[Double](dimensionality)
    val sum = samples.par
      .map { case (pt, p) => f(pt).getOrElse(zeroVector) * (1.0 / p) }
      .foldLeft(zeroVector)((a, b) => { a + b })
    sum * (1.0 / (sampler.numberOfPoints - 1))
  }

} 
Example 3
Source File: Registration.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.registration

import breeze.linalg.DenseVector
import scalismo.numerics._
import scalismo.registration.TransformationSpace.ParameterVector


  def iterator(initialParameters: DenseVector[Double]): Iterator[RegistrationState] = {

    val costFunction = new CostFunction {
      def onlyValue(params: ParameterVector): Double = {
        metric.value(params) + regularizationWeight * regularizer.value(params)
      }
      def apply(params: ParameterVector): (Double, DenseVector[Double]) = {

        // compute the value of the cost function
        val metricValueAndDerivative = metric.valueAndDerivative(params)
        val value = metricValueAndDerivative.value + regularizationWeight * regularizer.value(params)
        val dR = regularizer.takeDerivative(params)

        (value, metricValueAndDerivative.derivative + dR * regularizationWeight)
      }
    }

    optimizer.iterations(initialParameters, costFunction).map { optimizerState =>
      RegistrationState(optimizerState.value, optimizerState.parameters, optimizerState)
    }
  }

} 
Example 4
Source File: GaussianProcessTransformationSpace.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.registration

import breeze.linalg.{DenseMatrix, DenseVector}
import scalismo.geometry.EuclideanVector.VectorVectorizer
import scalismo.geometry.{EuclideanVector, Point}
import scalismo.registration.TransformationSpace.ParameterVector
import scalismo.statisticalmodel.LowRankGaussianProcess
import scalismo.statisticalmodel.LowRankGaussianProcess.Eigenpair

class GaussianProcessTransformationSpace[D] private (gp: LowRankGaussianProcess[D, EuclideanVector[D]])(
  implicit vectorizer: VectorVectorizer[D]
) extends TransformationSpace[D] {

  override type T = GaussianProcessTransformation[D]

  override def identityTransformParameters = DenseVector.zeros[Double](parametersDimensionality)

  override def parametersDimensionality = gp.rank

  override def transformForParameters(p: ParameterVector) = GaussianProcessTransformation[D](gp, p)
  override def takeDerivativeWRTParameters(p: ParameterVector) = {

    
    (x: Point[D]) =>
      {
        val dim = x.dimensionality
        val J = DenseMatrix.zeros[Double](dim, gp.klBasis.size)
        (0 until gp.rank).map(i => {
          val Eigenpair(lambda_i, phi_i) = gp.klBasis(i)
          J(::, i) := vectorizer.vectorize(phi_i(x)) * math.sqrt(lambda_i)
        })
        J
      }
  }

}

class GaussianProcessTransformation[D] private (gp: LowRankGaussianProcess[D, EuclideanVector[D]],
                                                alpha: ParameterVector)
    extends ParametricTransformation[D] {

  val instance = gp.instance(alpha)
  val parameters = alpha

  override val domain = gp.domain

  override val f = (x: Point[D]) => {
    val newPointAsVector = instance(x)
    x + newPointAsVector
  }

}

object GaussianProcessTransformation {
  def apply[D](gp: LowRankGaussianProcess[D, EuclideanVector[D]], alpha: TransformationSpace.ParameterVector) = {
    new GaussianProcessTransformation[D](gp, alpha)
  }
}

object GaussianProcessTransformationSpace {
  def apply[D](gp: LowRankGaussianProcess[D, EuclideanVector[D]])(implicit vectorizer: VectorVectorizer[D]) = {
    new GaussianProcessTransformationSpace[D](gp)
  }
} 
Example 5
Source File: StatisticalVolumeIntensityModel.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.statisticalmodel.experimental

import breeze.linalg.DenseVector
import scalismo.common._
import scalismo.geometry._
import scalismo.mesh._
import scalismo.statisticalmodel.DiscreteLowRankGaussianProcess
import scalismo.utils.Random

import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag

trait StatisticalVolumeIntensityModel[S] {

  def referenceMeshField: ScalarVolumeMeshField[S]

  def shape: StatisticalVolumeMeshModel

  def intensity: DiscreteLowRankGaussianProcess[_3D, UnstructuredPointsDomain[_3D], S]

  def mean: ScalarVolumeMeshField[S]

  def instance(coefficients: SVIMCoefficients): ScalarVolumeMeshField[S]

  def sample()(implicit rnd: Random): ScalarVolumeMeshField[S]

  def zeroCoefficients: SVIMCoefficients
}

object StatisticalVolumeIntensityModel {

  def apply[S: Scalar: TypeTag: ClassTag](
    referenceMeshField: ScalarVolumeMeshField[S],
    shape: StatisticalVolumeMeshModel,
    intensity: DiscreteLowRankGaussianProcess[_3D, UnstructuredPointsDomain[_3D], S]
  ): SVIM[S] = {
    SVIM(referenceMeshField, shape, intensity)
  }

}

case class SVIM[S: Scalar: TypeTag: ClassTag](
  referenceMeshField: ScalarVolumeMeshField[S],
  shape: StatisticalVolumeMeshModel,
  intensity: DiscreteLowRankGaussianProcess[_3D, UnstructuredPointsDomain[_3D], S]
) extends StatisticalVolumeIntensityModel[S] {

  override def mean: ScalarVolumeMeshField[S] = {
    ScalarVolumeMeshField(shape.mean, warpReferenceIntensity(intensity.mean.data))
  }

  override def instance(coefficients: SVIMCoefficients): ScalarVolumeMeshField[S] = {
    ScalarVolumeMeshField(shape.instance(coefficients.shape),
                          warpReferenceIntensity(intensity.instance(coefficients.intensity).data))
  }

  override def sample()(implicit rnd: Random): ScalarVolumeMeshField[S] = {
    ScalarVolumeMeshField(shape.sample(), warpReferenceIntensity(intensity.sample().data))
  }

  override def zeroCoefficients: SVIMCoefficients = SVIMCoefficients(
    DenseVector.zeros[Double](shape.rank),
    DenseVector.zeros[Double](intensity.rank)
  )

  def truncate(shapeComps: Int, colorComps: Int): SVIM[S] = {
    require(shapeComps >= 0 && shapeComps <= shape.rank, "illegal number of reduced shape components")
    require(colorComps >= 0 && colorComps <= intensity.rank, "illegal number of reduced color components")

    SVIM(
      referenceMeshField,
      shape.truncate(shapeComps),
      intensity.truncate(colorComps)
    )
  }

  private def warpReferenceIntensity(scalarData: IndexedSeq[S]): ScalarArray[S] = {
    ScalarArray[S](
      referenceMeshField.data
        .zip(ScalarArray[S](scalarData.toArray))
        .map { case (r, s) => Scalar[S].plus(r, s) }
        .toArray
    )
  }
} 
Example 6
Source File: SVIMCoefficients.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.statisticalmodel.experimental

import breeze.linalg.DenseVector

case class SVIMCoefficients(shape: DenseVector[Double], intensity: DenseVector[Double]) {
  def *(f: Float): SVIMCoefficients = this * f.toDouble
  def *(d: Double): SVIMCoefficients = copy(shape = shape * d, intensity = intensity * d)
  def +(other: SVIMCoefficients): SVIMCoefficients =
    copy(shape = shape + other.shape, intensity = intensity + other.intensity)
  def -(other: SVIMCoefficients): SVIMCoefficients =
    copy(shape = shape - other.shape, intensity = intensity - other.intensity)
}

object SVIMCoefficients {
  def apply(shape: IndexedSeq[Double], intensity: IndexedSeq[Double]) =
    new SVIMCoefficients(DenseVector(shape.toArray), DenseVector(intensity.toArray))

  
  def zeros(shapeComponents: Int, intensityComponents: Int): SVIMCoefficients = {
    new SVIMCoefficients(DenseVector.zeros(shapeComponents), DenseVector.zeros(intensityComponents))
  }
} 
Example 7
Source File: convertOutput.scala    From SparkAndMPIFactorizations   with MIT License 5 votes vote down vote up
package org.apache.spark.mllib.linalg.distributed
import breeze.linalg.{DenseMatrix, DenseVector}
import java.io.{DataInputStream, FileInputStream, FileWriter, File}

object ConvertDump { 

  type DM = DenseMatrix[Double]
  type DDV = DenseVector[Double]
  type DIV = DenseVector[Int]

  def loadDoubleVector( inf: DataInputStream) : DDV = {
    val len = inf.readInt()
    val v = DenseVector.zeros[Double](len)
    for (i <- 0 until len) {
      v(i) = inf.readDouble()
    }
    v
  }
  
  def loadIntVector( inf: DataInputStream) : DIV = {
    val len = inf.readInt()
    val v = DenseVector.zeros[Int](len)
    for (i <- 0 until len) {
      v(i) = inf.readInt()
    }
    v
  }

  def loadMatrix( inf: DataInputStream) : DM = {
    val (r,c) = Tuple2(inf.readInt(), inf.readInt())
    val m = DenseMatrix.zeros[Double](r,c)
    for (i <- 0 until r; j <- 0 until c) {
      m(i,j) = inf.readDouble()
    }
    m 
  }

  def loadDump(infname: String) : Tuple4[DM, DM, DDV, DDV] = {

    val inf = new DataInputStream( new FileInputStream(infname))

    val eofsU = loadMatrix(inf)
    val eofsV = loadMatrix(inf)
    val evals = loadDoubleVector(inf)
    val mean = loadDoubleVector(inf)

    inf.close()
    (eofsU, eofsV, evals, mean)
  }

  def writeDoubleMatrix(mat: DM, fn: String) = {
    val writer = new FileWriter(new File(fn))
    writer.write("%%MatrixMarket matrix coordinate real general\n")
    writer.write(s"${mat.rows} ${mat.cols} ${mat.rows*mat.cols}\n")
    for(i <- 0 until mat.rows) {
      for(j <- 0 until mat.cols) {
        writer.write(f"${i+1} ${j+1} ${mat(i, j)}%f\n")
      }
    }
    writer.close
  }

  def writeIntVector(vec: DIV, fn: String) = {
    val mat = vec.asDenseMatrix
    val writer = new FileWriter(new File(fn))
    writer.write("%%MatrixMarket matrix coordinate real general\n")
    writer.write(s"${mat.rows} ${mat.cols} ${mat.rows*mat.cols}\n")
    for(i <- 0 until mat.rows) {
      for(j <- 0 until mat.cols) {
        writer.write(s"${i+1} ${j+1} ${mat(i, j)}\n")
      }
    }
    writer.close
  }

  def main(args: Array[String]) {
    val (eofsU, eofsV, eofsS, mean) = loadDump(args(0))
    writeDoubleMatrix(eofsU, s"${args(1)}/colEOFs")
    writeDoubleMatrix(eofsV, s"${args(1)}/rowEOFs")
    writeDoubleMatrix(eofsS.asDenseMatrix, s"${args(1)}/evalEOFs")
    writeDoubleMatrix(mean.asDenseMatrix, s"${args(1)}/rowMeans")
  }
} 
Example 8
Source File: LocalKMeans.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.SparkContext._


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData = {
    def generatePoint(i: Int) = {
      DenseVector.fill(D){rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
} 
Example 9
Source File: SparkLR.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}

import org.apache.spark._


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Numer of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData = {
    def generatePoint(i: Int) = {
      val y = if(i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkLR")
    val sc = new SparkContext(sparkConf)
    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = sc.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)

    sc.stop()
  }
} 
Example 10
Source File: LocalFileLR.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{Vector, DenseVector}


object LocalFileLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
} 
Example 11
Source File: SparkKMeans.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.SparkContext._


object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 0 until centers.length) {
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkKMeans")
    val sc = new SparkContext(sparkConf)
    val lines = sc.textFile(args(0))
    val data = lines.map(parseVector _).cache()
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42).toArray
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((x1, y1), (x2, y2)) => (x1 + x2, y1 + y2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println("Finished iteration (delta = " + tempDist + ")")
    }

    println("Final centers:")
    kPoints.foreach(println)
    sc.stop()
  }
} 
Example 12
Source File: LocalLR.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{Vector, DenseVector}


object LocalLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData = {
    def generatePoint(i: Int) = {
      val y = if(i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- data) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient +=  p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
} 
Example 13
Source File: MetropolisHastingsTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.sampling

import breeze.linalg.{DenseMatrix, DenseVector}
import scalismo.ScalismoTestSuite
import scalismo.sampling.algorithms.{MetropolisHastings, MetropolisHastingsWithPrefetching}
import scalismo.sampling.evaluators.GaussianEvaluator
import scalismo.statisticalmodel.MultivariateNormalDistribution

class MetropolisHastingsTests extends ScalismoTestSuite {

  implicit val rng = scalismo.utils.Random(42)

  val gaussianProposal =
    new ProposalGenerator[Double] with TransitionProbability[Double] with SymmetricTransitionRatio[Double] {
      val sdev = 1.0

      
      override def logTransitionProbability(from: Double, to: Double): Double =
        GaussianEvaluator.logDensity(to, from, sdev)
    }

  describe("The metropolis-hastings algorithm") {

    it("approximates the mean and covariance of a normal distribution") {
      val mean = 1.0
      val sdev = 3.5
      val evaluator = GaussianEvaluator(mean, sdev)

      val mh = MetropolisHastings(gaussianProposal, evaluator)
      val samples = mh.iterator(0.0).drop(100000).take(100000).toIndexedSeq
      val approximatedMean = samples.sum / samples.size
      val approximatedVariance =
        samples.map(sample => (sample - mean) * (sample - mean)).sum / samples.size

      approximatedMean should be(mean +- 1e-1)
      Math.sqrt(approximatedVariance) should be(sdev +- 5e-1)
    }
  }

  describe("The metropolis-hastings algorithm with prefetching") {

    it("approximates the mean and covariance of a normal distribution") {
      val mean = 1.0
      val sdev = 3.5
      val evaluator = GaussianEvaluator(mean, sdev)

      val mh = MetropolisHastingsWithPrefetching(gaussianProposal, evaluator)
      val samples = mh.iterator(0.0).drop(100000).take(100000).toIndexedSeq
      val approximatedMean = samples.sum / samples.size
      val approximatedVariance =
        samples.map(sample => (sample - mean) * (sample - mean)).sum / samples.size

      approximatedMean should be(mean +- 1e-1)
      Math.sqrt(approximatedVariance) should be(sdev +- 5e-1)
    }
  }
} 
Example 14
Source File: SparkTachyonHdfsLR.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}
import org.apache.hadoop.conf.Configuration

import org.apache.spark._
import org.apache.spark.scheduler.InputFormatInfo
import org.apache.spark.storage.StorageLevel



object SparkTachyonHdfsLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def main(args: Array[String]) {

    showWarning()

    val inputPath = args(0)
    val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR")
    val conf = new Configuration()
    val sc = new SparkContext(sparkConf,
      InputFormatInfo.computePreferredLocations(
        Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
      ))
    val lines = sc.textFile(inputPath)
    val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    sc.stop()
  }
} 
Example 15
Source File: LocalKMeans.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.SparkContext._


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D){rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 16
Source File: SparkLR.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}

import org.apache.spark._


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Numer of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkLR")
    val sc = new SparkContext(sparkConf)
    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = sc.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)

    sc.stop()
  }
}
// scalastyle:on println 
Example 17
Source File: LocalFileLR.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{Vector, DenseVector}


object LocalFileLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 18
Source File: SparkKMeans.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.SparkContext._


object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 0 until centers.length) {
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkKMeans")
    val sc = new SparkContext(sparkConf)
    val lines = sc.textFile(args(0))
    val data = lines.map(parseVector _).cache()
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42).toArray
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println("Finished iteration (delta = " + tempDist + ")")
    }

    println("Final centers:")
    kPoints.foreach(println)
    sc.stop()
  }
}
// scalastyle:on println 
Example 19
Source File: LocalLR.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{Vector, DenseVector}


object LocalLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- data) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient +=  p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 20
Source File: SparkHdfsLR.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}
import org.apache.hadoop.conf.Configuration

import org.apache.spark._
import org.apache.spark.scheduler.InputFormatInfo



object SparkHdfsLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 2) {
      System.err.println("Usage: SparkHdfsLR <file> <iters>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
    val inputPath = args(0)
    val conf = new Configuration()
    val sc = new SparkContext(sparkConf,
      InputFormatInfo.computePreferredLocations(
        Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
      ))
    val lines = sc.textFile(inputPath)
    val points = lines.map(parsePoint _).cache()
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    sc.stop()
  }
}
// scalastyle:on println 
Example 21
Source File: SparkTachyonHdfsLR.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}
import org.apache.hadoop.conf.Configuration

import org.apache.spark._
import org.apache.spark.scheduler.InputFormatInfo
import org.apache.spark.storage.StorageLevel



object SparkTachyonHdfsLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def main(args: Array[String]) {

    showWarning()

    val inputPath = args(0)
    val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR")
    val conf = new Configuration()
    val sc = new SparkContext(sparkConf,
      InputFormatInfo.computePreferredLocations(
        Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
      ))
    val lines = sc.textFile(inputPath)
    val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    sc.stop()
  }
}
// scalastyle:on println 
Example 22
Source File: ExactSimilarityReference.scala    From elastiknn   with Apache License 2.0 5 votes vote down vote up
package com.klibisz.elastiknn.testing

import breeze.linalg.DenseVector
import com.klibisz.elastiknn.api.Vec
import breeze.linalg.functions._


object ExactSimilarityReference {

  val L2: (Vec.DenseFloat, Vec.DenseFloat) => Double = (v1: Vec.DenseFloat, v2: Vec.DenseFloat) => {
    1 / (1 + euclideanDistance(new DenseVector(v1.values), new DenseVector(v2.values)))
  }

  val L1: (Vec.DenseFloat, Vec.DenseFloat) => Double = (v1: Vec.DenseFloat, v2: Vec.DenseFloat) => {
    1 / (1 + manhattanDistance(new DenseVector(v1.values), new DenseVector(v2.values)))
  }

  val Angular: (Vec.DenseFloat, Vec.DenseFloat) => Double = (v1: Vec.DenseFloat, v2: Vec.DenseFloat) => {
    1 + (1 - cosineDistance(new DenseVector(v1.values.map(_.toDouble)), new DenseVector(v2.values.map(_.toDouble))))
  }

  val Hamming: (Vec.SparseBool, Vec.SparseBool) => Double = (v1: Vec.SparseBool, v2: Vec.SparseBool) => {
    val d1 = new Array[Boolean](v1.totalIndices)
    val d2 = new Array[Boolean](v2.totalIndices)
    v1.trueIndices.foreach(i => d1.update(i, true))
    v2.trueIndices.foreach(i => d2.update(i, true))
    d1.zip(d2).count { case (a, b) => a == b } * 1d / d1.length
  }

  val Jaccard = (v1: Vec.SparseBool, v2: Vec.SparseBool) => {
    val isec = v1.trueIndices.intersect(v2.trueIndices).length
    val denom = v1.trueIndices.length + v2.trueIndices.length - isec
    if (isec == 0 && denom == 0) 1d
    else if (denom > 0) isec * 1d / denom
    else 0d
  }

} 
Example 23
Source File: min-ppl.scala    From blog   with Apache License 2.0 5 votes vote down vote up
object MinPpl {

  import breeze.stats.{distributions => bdist}
  import breeze.linalg.DenseVector

  implicit val numParticles = 300

  case class Particle[T](v: T, lw: Double) { // value and log-weight
    def map[S](f: T => S): Particle[S] = Particle(f(v), lw)
  }

  trait Prob[T] {
    val particles: Vector[Particle[T]]
    def map[S](f: T => S): Prob[S] = Empirical(particles map (_ map f))
    def flatMap[S](f: T => Prob[S]): Prob[S] = {
      Empirical((particles map (p => {
        f(p.v).particles.map(psi => Particle(psi.v, p.lw + psi.lw))
      })).flatten).resample
    }
    def resample(implicit N: Int): Prob[T] = {
      val lw = particles map (_.lw)
      val mx = lw reduce (math.max(_,_))
      val rw = lw map (lwi => math.exp(lwi - mx))
      val law = mx + math.log(rw.sum/(rw.length))
      val ind = bdist.Multinomial(DenseVector(rw.toArray)).sample(N)
      val newParticles = ind map (i => particles(i))
      Empirical(newParticles.toVector map (pi => Particle(pi.v, law)))
    }
    def cond(ll: T => Double): Prob[T] =
      Empirical(particles map (p => Particle(p.v, p.lw + ll(p.v))))
    def empirical: Vector[T] = resample.particles.map(_.v)
  }

  case class Empirical[T](particles: Vector[Particle[T]]) extends Prob[T]

  def unweighted[T](ts: Vector[T], lw: Double = 0.0): Prob[T] =
    Empirical(ts map (Particle(_, lw)))

  trait Dist[T] extends Prob[T] {
    def ll(obs: T): Double
    def ll(obs: Seq[T]): Double = obs map (ll) reduce (_+_)
    def fit(obs: Seq[T]): Prob[T] =
      Empirical(particles map (p => Particle(p.v, p.lw + ll(obs))))
    def fitQ(obs: Seq[T]): Prob[T] = Empirical(Vector(Particle(obs.head, ll(obs))))
    def fit(obs: T): Prob[T] = fit(List(obs))
    def fitQ(obs: T): Prob[T] = fitQ(List(obs))
  }

  case class Normal(mu: Double, v: Double)(implicit N: Int) extends Dist[Double] {
    lazy val particles = unweighted(bdist.Gaussian(mu, math.sqrt(v)).sample(N).toVector).particles
    def ll(obs: Double) = bdist.Gaussian(mu, math.sqrt(v)).logPdf(obs)
  }

  case class Gamma(a: Double, b: Double)(implicit N: Int) extends Dist[Double] {
    lazy val particles = unweighted(bdist.Gamma(a, 1.0/b).sample(N).toVector).particles
    def ll(obs: Double) = bdist.Gamma(a, 1.0/b).logPdf(obs)
  }

  case class Poisson(mu: Double)(implicit N: Int) extends Dist[Int] {
    lazy val particles = unweighted(bdist.Poisson(mu).sample(N).toVector).particles
    def ll(obs: Int) = bdist.Poisson(mu).logProbabilityOf(obs)
  }

} 
Example 24
Source File: min-ppl.scala    From blog   with Apache License 2.0 5 votes vote down vote up
object MinPpl2 {

  import breeze.stats.{distributions => bdist}
  import breeze.linalg.DenseVector
  import cats._
  import cats.implicits._

  implicit val numParticles = 2000

  case class Particle[T](v: T, lw: Double) { // value and log-weight
    def map[S](f: T => S): Particle[S] = Particle(f(v), lw)
    def flatMap[S](f: T => Particle[S]): Particle[S] = {
      val ps = f(v)
      Particle(ps.v, lw + ps.lw)
    }
  }

  implicit val particleMonad = new Monad[Particle] {
    def pure[T](t: T): Particle[T] = Particle(t, 0.0)
    def flatMap[T,S](pt: Particle[T])(f: T => Particle[S]): Particle[S] = pt.flatMap(f)
    def tailRecM[T,S](t: T)(f: T => Particle[Either[T,S]]): Particle[S] = ???
  }

  trait Prob[T] {
    val particles: Vector[Particle[T]]
    def draw: Particle[T]
    def mapP[S](f: T => Particle[S]): Prob[S] = Empirical(particles map (_ flatMap f))
    def map[S](f: T => S): Prob[S] = mapP(v => Particle(f(v), 0.0))
    def flatMap[S](f: T => Prob[S]): Prob[S] = mapP(f(_).draw)
    def resample(implicit N: Int): Prob[T] = {
      val lw = particles map (_.lw)
      val mx = lw reduce (math.max(_,_))
      val rw = lw map (lwi => math.exp(lwi - mx))
      val law = mx + math.log(rw.sum/(rw.length))
      val ind = bdist.Multinomial(DenseVector(rw.toArray)).sample(N)
      val newParticles = ind map (i => particles(i))
      Empirical(newParticles.toVector map (pi => Particle(pi.v, law)))
    }
    def cond(ll: T => Double): Prob[T] = mapP(v => Particle(v, ll(v)))
    def empirical: Vector[T] = resample.particles.map(_.v)
  }

  implicit val probMonad = new Monad[Prob] {
    def pure[T](t: T): Prob[T] = Empirical(Vector(Particle(t, 0.0)))
    def flatMap[T,S](pt: Prob[T])(f: T => Prob[S]): Prob[S] = pt.flatMap(f)
    def tailRecM[T,S](t: T)(f: T => Prob[Either[T,S]]): Prob[S] = ???
  }

  case class Empirical[T](particles: Vector[Particle[T]]) extends Prob[T] {
    def draw: Particle[T] = {
      val lw = particles map (_.lw)
      val mx = lw reduce (math.max(_,_))
      val rw = lw map (lwi => math.exp(lwi - mx))
      val law = mx + math.log(rw.sum/(rw.length))
      val idx = bdist.Multinomial(DenseVector(rw.toArray)).draw
      Particle(particles(idx).v, law)
    }
  }

  def unweighted[T](ts: Vector[T], lw: Double = 0.0): Prob[T] =
    Empirical(ts map (Particle(_, lw)))

  trait Dist[T] extends Prob[T] {
    def ll(obs: T): Double
    def ll(obs: Seq[T]): Double = obs map (ll) reduce (_+_)
    def fit(obs: Seq[T]): Prob[T] = mapP(v => Particle(v, ll(obs)))
    def fitQ(obs: Seq[T]): Prob[T] = Empirical(Vector(Particle(obs.head, ll(obs))))
    def fit(obs: T): Prob[T] = fit(List(obs))
    def fitQ(obs: T): Prob[T] = fitQ(List(obs))
  }

  case class Normal(mu: Double, v: Double)(implicit N: Int) extends Dist[Double] {
    lazy val particles = unweighted(bdist.Gaussian(mu, math.sqrt(v)).
      sample(N).toVector).particles
    def draw = Particle(bdist.Gaussian(mu, math.sqrt(v)).draw, 0.0)
    def ll(obs: Double) = bdist.Gaussian(mu, math.sqrt(v)).logPdf(obs)
  }

  case class Gamma(a: Double, b: Double)(implicit N: Int) extends Dist[Double] {
    lazy val particles = unweighted(bdist.Gamma(a, 1.0/b).
      sample(N).toVector).particles
    def draw = Particle(bdist.Gamma(a, 1.0/b).draw, 0.0)
    def ll(obs: Double) = bdist.Gamma(a, 1.0/b).logPdf(obs)
  }

  case class Poisson(mu: Double)(implicit N: Int) extends Dist[Int] {
    lazy val particles = unweighted(bdist.Poisson(mu).
      sample(N).toVector).particles
    def draw = Particle(bdist.Poisson(mu).draw, 0.0)
    def ll(obs: Int) = bdist.Poisson(mu).logProbabilityOf(obs)
  }

}


// eof 
Example 25
Source File: Ledger.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.word.layer

import breeze.linalg.DenseVector
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.InputLayer
import com.github.nearbydelta.deepspark.word._
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast

import scala.reflect.{ClassTag, classTag}


trait Ledger[OutInfo] extends InputLayer[Array[Int], OutInfo] {
  @transient implicit override protected val evidenceI: ClassTag[Array[Int]] = classTag[Array[Int]]
  @transient var algorithm: LedgerAlgorithm = _
  var bcModel: Broadcast[LedgerModel] = _
  @transient var builder: LedgerBuilder = _
  var dimension: Int = 0
  @transient var model: LedgerModel = _
  protected var padID = -1

  def withModel(model: LedgerModel, builder: LedgerBuilder): this.type = {
    this.model = model
    this.builder = builder
    this.padID = model.padID
    this.dimension = model.dimension
    this.algorithm = builder.getUpdater(this.model.vectors)
    this
  }

  protected def pad =
    if (padID == -1) null
    else if (bcModel != null) vectorOf(bcModel.value.padID)
    else vectorOf(padID)

  protected def updateWord(word: Int, dx: DataVec): Unit =
    if (word != -1 && algorithm != null) {
      val vec = algorithm.delta.getOrElseUpdate(word, DenseVector.zeros[Double](dimension))
      vec += dx
    }

  protected def vectorOf(str: Int) =
    if (bcModel != null) bcModel.value.vectorAt(str)
    else model.vectorAt(str)

  override def broadcast(sc: SparkContext): Unit = {
    bcModel = sc.broadcast(model)
  }

  override def loss: Double = algorithm.loss

  override def read(kryo: Kryo, input: Input): Unit = {
    builder = kryo.readClassAndObject(input).asInstanceOf[LedgerBuilder]
    val model = new LedgerModel
    model.read(kryo, input)

    require(model.size > 0, "Model is empty!")
    withModel(model, builder)
    super.read(kryo, input)
  }

  override def unbroadcast(): Unit = {
    bcModel.unpersist(blocking = false)
  }

  @deprecated
  override def withInput(in: Int): this.type = this

  @deprecated
  override def withOutput(out: Int): this.type = this

  override def write(kryo: Kryo, output: Output): Unit = {
    kryo.writeClassAndObject(output, builder)
    model.write(kryo, output)
    super.write(kryo, output)
  }
} 
Example 26
Source File: BreezeSpec.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.extra

import breeze.linalg.{DenseMatrix, DenseVector, SparseVector}
import breeze.stats.distributions.Rand
import com.spotify.scio.extra.Breeze._
import com.twitter.algebird.Semigroup
import org.scalacheck._

trait BreezeSpec[M[_], T] extends PropertySpec {
  val dimension = 10
  val rows = 20
  val cols = 10
  val fRand = Rand.uniform.map(_.toFloat)
  val m: Gen[M[T]]
  def ms: Gen[List[M[T]]] = Gen.listOf[M[T]](m)
  def plus(x: M[T], y: M[T])(implicit sg: Semigroup[M[T]]): M[T] = sg.plus(x, y)
  def sumOption(xs: Iterable[M[T]])(implicit sg: Semigroup[M[T]]): Option[M[T]] = sg.sumOption(xs)
}

class FloatDenseVectorSpec extends BreezeSpec[DenseVector, Float] {
  val m = Gen.const(dimension).map(DenseVector.rand[Float](_, fRand))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleDenseVectorSpec extends BreezeSpec[DenseVector, Double] {
  val m = Gen.const(dimension).map(DenseVector.rand[Double](_))
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class FloatDenseMatrixSpec extends BreezeSpec[DenseMatrix, Float] {
  val m = Gen.const((rows, cols)).map {
    case (r, c) => DenseMatrix.rand[Float](r, c, fRand)
  }
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleDenseMatrixSpec extends BreezeSpec[DenseMatrix, Double] {
  val m = Gen.const((rows, cols)).map {
    case (r, c) => DenseMatrix.rand[Double](r, c)
  }
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class FloatSparseVectorSpec extends BreezeSpec[SparseVector, Float] {
  val m = Gen
    .const(dimension)
    .map(d => SparseVector(DenseVector.rand[Float](d, fRand).data))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleSparseVectorSpec extends BreezeSpec[SparseVector, Double] {
  val m = Gen
    .const(dimension)
    .map(d => SparseVector(DenseVector.rand[Double](d).data))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
} 
Example 27
Source File: PassiveAggressiveBinaryModelEvaluation.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.test.utils

import breeze.linalg.{DenseVector, SparseVector}
import hu.sztaki.ilab.ps.passive.aggressive.algorithm.PassiveAggressiveBinaryAlgorithm
import org.slf4j.LoggerFactory

class PassiveAggressiveBinaryModelEvaluation

object PassiveAggressiveBinaryModelEvaluation {

  private val log = LoggerFactory.getLogger(classOf[PassiveAggressiveBinaryModelEvaluation])


  def accuracy(model: DenseVector[Double],
               testLines: Traversable[(SparseVector[Double], Option[Boolean])],
               featureCount: Int,
               pac: PassiveAggressiveBinaryAlgorithm): Double = {

    var tt = 0
    var ff = 0
    var tf = 0
    var ft = 0
    var cnt = 0
    testLines.foreach { case (vector, label) => label match {
      case Some(lab) =>
        val real = lab
        val predicted = pac.predict(vector, model)
        (real, predicted) match {
          case (true, true) => tt +=1
          case (false, false) => ff +=1
          case (true, false) => tf +=1
          case (false, true) => ft +=1
        }
        cnt += 1
      case _ => throw new IllegalStateException("Labels shold not be missing.")
    }
    }
    val percent = ((tt + ff).toDouble / cnt) * 100

    percent
  }


} 
Example 28
Source File: LinearRegressionExpr.scala    From glow   with Apache License 2.0 5 votes vote down vote up
package io.projectglow.sql.expressions

import breeze.linalg.DenseVector
import org.apache.spark.TaskContext
import org.apache.spark.sql.SQLUtils
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, TernaryExpression}
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.types._

object LinearRegressionExpr {
  private val matrixUDT = SQLUtils.newMatrixUDT()
  private val state = new ThreadLocal[CovariateQRContext]

  def doLinearRegression(genotypes: Any, phenotypes: Any, covariates: Any): InternalRow = {

    if (state.get() == null) {
      // Save the QR factorization of the covariate matrix since it's the same for every row
      state.set(CovariateQRContext.computeQR(matrixUDT.deserialize(covariates).toDense))
      TaskContext.get().addTaskCompletionListener[Unit](_ => state.remove())
    }

    LinearRegressionGwas.linearRegressionGwas(
      new DenseVector[Double](genotypes.asInstanceOf[ArrayData].toDoubleArray()),
      new DenseVector[Double](phenotypes.asInstanceOf[ArrayData].toDoubleArray()),
      state.get()
    )
  }
}

case class LinearRegressionExpr(
    genotypes: Expression,
    phenotypes: Expression,
    covariates: Expression)
    extends TernaryExpression
    with ImplicitCastInputTypes {

  private val matrixUDT = SQLUtils.newMatrixUDT()

  override def dataType: DataType =
    StructType(
      Seq(
        StructField("beta", DoubleType),
        StructField("standardError", DoubleType),
        StructField("pValue", DoubleType)))

  override def inputTypes: Seq[DataType] =
    Seq(ArrayType(DoubleType), ArrayType(DoubleType), matrixUDT)

  override def children: Seq[Expression] = Seq(genotypes, phenotypes, covariates)

  override protected def nullSafeEval(genotypes: Any, phenotypes: Any, covariates: Any): Any = {
    LinearRegressionExpr.doLinearRegression(genotypes, phenotypes, covariates)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    nullSafeCodeGen(
      ctx,
      ev,
      (genotypes, phenotypes, covariates) => {
        s"""
         |${ev.value} = io.projectglow.sql.expressions.LinearRegressionExpr.doLinearRegression($genotypes, $phenotypes, $covariates);
       """.stripMargin
      }
    )
  }
} 
Example 29
Source File: LikelihoodRatioTest.scala    From glow   with Apache License 2.0 5 votes vote down vote up
package io.projectglow.sql.expressions

import breeze.linalg.{DenseMatrix, DenseVector}
import org.apache.spark.ml.linalg.{DenseMatrix => SparkDenseMatrix}
import org.apache.spark.sql.Encoders
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.StructType

object LikelihoodRatioTest extends LogitTest {
  override type FitState = LRTFitState
  override def fitStatePerPhenotype: Boolean = true
  override val resultSchema: StructType = Encoders.product[LogitTestResults].schema

  override def init(phenotypes: Array[Double], covariates: SparkDenseMatrix): LRTFitState = {
    val nullX = new DenseMatrix(covariates.numRows, covariates.numCols, covariates.values)
    val y = new DenseVector(phenotypes)
    val nullFitState = new NewtonIterationsState(covariates.numRows, covariates.numCols)
    nullFitState.initFromMatrix(nullX, y)
    val nullFit = LogisticRegressionGwas.newtonIterations(nullX, y, nullX.copy, nullFitState)
    val fullFitState = new NewtonIterationsState(covariates.numRows, covariates.numCols + 1)
    val x = DenseMatrix.horzcat(nullX, DenseMatrix.zeros[Double](covariates.numRows, 1))
    LRTFitState(x, x.copy, nullFit, fullFitState)
  }

  override def runTest(
      genotypes: DenseVector[Double],
      phenotypes: DenseVector[Double],
      fitState: LRTFitState): InternalRow = {
    fitState.x(::, -1) := genotypes
    fitState.newtonState.initFromMatrixAndNullFit(fitState.x, phenotypes, fitState.nullFit.args)

    if (!fitState.nullFit.converged) {
      return LogitTestResults.nanRow
    }

    val fullFit =
      LogisticRegressionGwas.newtonIterations(
        fitState.x,
        phenotypes,
        fitState.hessian,
        fitState.newtonState)

    if (!fullFit.converged) {
      return LogitTestResults.nanRow
    }

    val beta = fullFit.args.b(-1)
    LogisticRegressionGwas.makeStats(
      beta,
      fullFit.args.fisher,
      fullFit.logLkhd,
      fitState.nullFit.logLkhd)
  }
}

case class LRTFitState(
    x: DenseMatrix[Double],
    hessian: DenseMatrix[Double],
    nullFit: NewtonResult,
    newtonState: NewtonIterationsState
) 
Example 30
Source File: LinearRegressionGwas.scala    From glow   with Apache License 2.0 5 votes vote down vote up
package io.projectglow.sql.expressions

import breeze.linalg.DenseVector
import org.apache.commons.math3.distribution.TDistribution
import org.apache.commons.math3.util.FastMath
import org.apache.spark.sql.catalyst.InternalRow

import io.projectglow.common.GlowLogging

case class RegressionStats(beta: Double, standardError: Double, pValue: Double)

object LinearRegressionGwas extends GlowLogging {

  
  def runRegression(
      genotypes: DenseVector[Double],
      phenotypes: DenseVector[Double],
      covariateQRContext: CovariateQRContext): RegressionStats = {
    require(
      genotypes.length == phenotypes.length,
      "Number of samples differs between genotype and phenotype arrays")
    require(
      covariateQRContext.covQt.cols == genotypes.length,
      "Number of samples differs between genotype array and covariate matrix")

    val qtx = covariateQRContext.covQt * genotypes
    val qty = covariateQRContext.covQt * phenotypes

    val xdoty = (phenotypes dot genotypes) - (qty dot qtx)
    val xdotx = (genotypes dot genotypes) - (qtx dot qtx)
    val ydoty = (phenotypes dot phenotypes) - (qty dot qty)
    val beta = xdoty / xdotx
    val standardError =
      FastMath.sqrt((ydoty / xdotx - beta * beta) / covariateQRContext.degreesOfFreedom)

    // t-statistic
    val t = beta / standardError
    val tDist = new TDistribution(covariateQRContext.degreesOfFreedom)
    val pvalue = 2 * tDist.cumulativeProbability(-Math.abs(t))
    RegressionStats(beta, standardError, pvalue)
  }

  def linearRegressionGwas(
      genotypes: DenseVector[Double],
      phenotypes: DenseVector[Double],
      covariateQR: CovariateQRContext): InternalRow = {

    val regressionStats = runRegression(genotypes, phenotypes, covariateQR)

    InternalRow(regressionStats.beta, regressionStats.standardError, regressionStats.pValue)
  }
} 
Example 31
Source File: SparkMLTestUtils.scala    From aardpfark   with Apache License 2.0 5 votes vote down vote up
package com.ibm.aardpfark.spark.ml

import scala.util.Random

import breeze.linalg.DenseVector

import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.mllib.random.{GammaGenerator, PoissonGenerator, StandardNormalGenerator}

object SparkMLTestUtils {

  def generateGeneralizedLinearRegressionInput(
    intercept: Double,
    coefficients: Array[Double],
    xMean: Array[Double],
    xVariance: Array[Double],
    nPoints: Int,
    seed: Int,
    noiseLevel: Double,
    family: String,
    link: String): Seq[LabeledPoint] = {

    val rnd = new Random(seed)
    def rndElement(i: Int) = {
      (rnd.nextDouble() - 0.5) * math.sqrt(12.0 * xVariance(i)) + xMean(i)
    }
    val (generator, mean) = family match {
      case "gaussian" => (new StandardNormalGenerator, 0.0)
      case "poisson" => (new PoissonGenerator(1.0), 1.0)
      case "gamma" => (new GammaGenerator(1.0, 1.0), 1.0)
    }
    generator.setSeed(seed)

    (0 until nPoints).map { _ =>
      val x = DenseVector(coefficients.indices.map(rndElement).toArray)
      val w = DenseVector(coefficients)
      val eta = w.dot(x) + intercept
      val mu = link match {
        case "identity" => eta
        case "log" => math.exp(eta)
        case "sqrt" => math.pow(eta, 2.0)
        case "inverse" => 1.0 / eta
      }
      val label = mu + noiseLevel * (generator.nextValue() - mean)
      // Return LabeledPoints with DenseVector
      LabeledPoint(label, Vectors.dense(x.data))
    }
  }

} 
Example 32
Source File: MLPClassifier.scala    From aardpfark   with Apache License 2.0 5 votes vote down vote up
package com.ibm.aardpfark.spark.ml.classification

import scala.collection.mutable.ArrayBuffer

import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument}
import com.ibm.aardpfark.pfa.dsl._
import com.ibm.aardpfark.pfa.expression._
import com.ibm.aardpfark.pfa.types.WithSchema
import com.ibm.aardpfark.spark.ml.PFAPredictionModel
import breeze.linalg.{DenseMatrix, DenseVector}
import com.sksamuel.avro4s.{AvroNamespace, AvroSchema}
import org.apache.avro.Schema

import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel

@AvroNamespace("com.ibm.aardpfark.exec.spark.ml.classification")
case class Layer(weights: Array[Array[Double]], bias: Array[Double])
@AvroNamespace("com.ibm.aardpfark.exec.spark.ml.classification")
case class Layers(layers: Seq[Layer]) extends WithSchema {
  override def schema: Schema = AvroSchema[this.type]
}

class PFAMultilayerPerceptronClassificationModel(
  override val sparkTransformer: MultilayerPerceptronClassificationModel)
  extends PFAPredictionModel[Layers] {

  private def getLayers = {
    val weights = sparkTransformer.weights.toArray
    val inputLayers = sparkTransformer.layers
    val layers = ArrayBuffer[Layer]()
    var offset = 0
    for (i <- 0 to inputLayers.size - 2) {
      val in = inputLayers(i)
      val out = inputLayers(i + 1)
      val wOffset = out * in
      val wData = weights.slice(offset, offset + wOffset)
      val bData = weights.slice(offset + wOffset, offset + wOffset + out)
      val w = Array.ofDim[Double](out, in)
      new DenseMatrix[Double](out, in, wData).foreachPair { case ((ii, jj), v) => w(ii)(jj) = v }
      val b = new DenseVector[Double](bData).toArray
      layers += Layer(w, b)
      offset += wOffset + out
    }
    layers.toArray
  }

  override protected def cell = Cell(Layers(getLayers))

  private val doubleSigmoid = NamedFunctionDef("doubleSigmoid", FunctionDef[Double, Double](
    "x", m.link.logit("x")
  ))

  override def action: PFAExpression = {
    val forward = model.neural.simpleLayers(inputExpr, modelCell.ref("layers"), doubleSigmoid.ref)
    val softmax = m.link.softmax(forward)
    NewRecord(outputSchema, Map(predictionCol -> a.argmax(softmax)))
  }

  override def pfa: PFADocument = {
    PFABuilder()
      .withName(sparkTransformer.uid)
      .withMetadata(getMetadata)
      .withInput(inputSchema)
      .withOutput(outputSchema)
      .withCell(modelCell)
      .withFunction(doubleSigmoid)
      .withAction(action)
      .pfa
  }

} 
Example 33
Source File: TensorCommons.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.scala.clustering.tensor

object TensorCommons {

  // The index of top k element of the vector

	def obtainTopkIndices[@specialized(Int, Double) N](vector: DenseVector[N], k: Int)(implicit num: SNumeric[N], ev: ClassTag[N]): Array[Int] = vector.toArray.zipWithIndex.sortWith( (x, y) => num.gt(x._1, y._1) ).take(k).map(_._2)

  //Represent the data as a tensor. 
	def dataToTensor(data: Array[Array[Double]], n1: Int, n2: Int, n3: Int): ArrayBuffer[DenseMatrix[Double]] ={
		val r = data.length
		val c = data.head.length
		def datatomatrix(buf: Array[Array[Double]], m: DenseMatrix[Double]): DenseMatrix[Double] = {
			(0 until r).foreach{ i =>
				(0 until c).foreach{ j =>
					m(i,j) = data(i)(j)
				}
			}
			m
		}
		var dm = datatomatrix(data, DenseMatrix.zeros[Double](r,c))

		def todm(ds: DenseMatrix[Double], t: ArrayBuffer[DenseMatrix[Double]], a: Int, b: Int, c: Int): ArrayBuffer[DenseMatrix[Double]] = {
			// to do in tailrec
			var h = 0
			(0 until c).foreach{ k =>
				var m = DenseMatrix.zeros[Double](a, b)
				(0  until a).foreach{ i =>
				    (0 until b).foreach{ j =>
				    	h = j + (k * b)
				    	m(i, j) = ds(i, h)
				    }
				}
				t += m
			}
			t
		}

		val tens = ArrayBuffer.empty[DenseMatrix[Double]]
		val tensor = todm(dm, tens, n1, n2, n3)
		tensor

	}	

} 
Example 34
Source File: TestXOR.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
import breeze.linalg.DenseVector
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.{BasicLayer, VectorRBFLayer}
import com.github.nearbydelta.deepspark.network.SimpleNetwork
import com.github.nearbydelta.deepspark.train.{TrainerBuilder, TrainingParam}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.{SparkConf, SparkContext}


object TestXOR {
  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local[5]").setAppName("TestXOR")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.broadcast.blockSize", "40960")
      .set("spark.akka.frameSize", "50")
    val sc = new SparkContext(conf)

    val data = (0 to 100).collect {
      case i if i > 75 || i < 25 ⇒
        (0 to 100).collect {
          case j if j > 75 || j < 25 ⇒
            val xor =
              if (i > 75 && j < 25) true
              else if (i < 25 && j > 75) true
              else false
            (DenseVector[Double](i / 100.0, j / 100.0), xor)
        }
    }.flatMap(x ⇒ x)

    val train = sc.makeRDD(data)
    val test = train

    try {
      Weight.scalingDownBy(10.0)
      val builder = new AdaGrad(l2decay = 0.001, rate = 0.01)
      val rbf = new VectorRBFLayer withActivation GaussianRBF withCenters Seq(DenseVector(1.0, 1.0), DenseVector(0.0, 0.0), DenseVector(1.0, 0.0), DenseVector(0.0, 1.0))
      val network = new SimpleNetwork[Boolean]()
        //        .add(new BasicLayer withInput 2 withOutput 4)
        .add(rbf)
        //        .add(new BasicLayer withActivation LeakyReLU withOutput 4)
        .add(new BasicLayer withActivation SoftmaxCEE withOutput 2)
        .initiateBy(builder)

      println(rbf.epsilon.value)
      require(network.NOut == 2)
      //      require(network.layers.head.asInstanceOf[BasicLayer].bias != null)
      //      require(network.layers.head.asInstanceOf[BasicLayer].weight.value != null)
      //      require(network.layers.head.asInstanceOf[BasicLayer].bias.value.length > 0)

      val trained = new TrainerBuilder(TrainingParam(miniBatch = 10, maxIter = 100, dataOnLocal = true,
        reuseSaveData = true, storageLevel = StorageLevel.MEMORY_ONLY))
        .build(network, train, test, CrossEntropyErr,
          (x: Boolean) ⇒ if (x) DenseVector(1.0, 0.0) else DenseVector(0.0, 1.0), "XORTest")
        .getTrainedNetwork
      println(rbf.epsilon.value)

      (0 until 10).foreach { _ ⇒
        val (in, exp) = data(Math.floor(Math.random() * data.length).toInt)
        val out = trained.predictSoft(in)
        println(s"IN : $in, EXPECTED: $exp, OUTPUT ${out(0) > out(1)} $out")
      }
    } finally {
      sc.stop()
    }
  }
} 
Example 35
Source File: TestConcat.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
import breeze.linalg.DenseVector
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.{BasicLayer, NetworkConcatLayer}
import com.github.nearbydelta.deepspark.network.{GeneralNetwork, SimpleNetwork}
import com.github.nearbydelta.deepspark.train.{TrainerBuilder, TrainingParam}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.{SparkConf, SparkContext}

import scala.reflect.{ClassTag, classTag}


object TestConcat {
  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local[5]").setAppName("TestXOR")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.broadcast.blockSize", "40960")
      .set("spark.akka.frameSize", "50")
    val sc = new SparkContext(conf)

    val data = (0 to 10).collect {
      case i if i > 7 || i < 3 ⇒
        (0 to 10).collect {
          case j if j > 7 || j < 3 ⇒
            val xor =
              if (i > 7 && j > 7) true
              else if (i < 3 && j < 3) true
              else false
            (0 to 10).collect {
              case k if k > 7 || k < 3 ⇒
                (0 to 10).collect {
                  case l if l > 7 || l < 3 ⇒
                    val xor2 =
                      if (i > 7 && j > 7) true
                      else if (i < 3 && j < 3) true
                      else false
                    (Array(DenseVector(i / 10.0, j / 10.0), DenseVector(k / 10.0, l / 10.0)),
                      xor && xor2)
                }
            }.flatMap(x ⇒ x)
        }.flatMap(x ⇒ x)
    }.flatMap(x ⇒ x)

    val train = sc.makeRDD(data)
    val test = train

    try {
      val builder = new AdaGrad(l2decay = 0.00001, rate = 0.01)
      val input1 = new SimpleNetwork[DataVec]()
        .add(new BasicLayer withInput 2 withOutput 4)
        .add(new BasicLayer withInput 4 withOutput 1)
      val input2 = new SimpleNetwork[DataVec]()
        .add(new BasicLayer withInput 2 withOutput 4)
        .add(new BasicLayer withInput 4 withOutput 1)
      val concat = new ConcatLayer().addNetwork(input1).addNetwork(input2)
      val network = new GeneralNetwork[Array[DataVec], Boolean](concat)
        .add(new BasicLayer withInput 2 withOutput 4)
        .add(new BasicLayer withInput 4 withOutput 1)
        .initiateBy(builder)

      require(network.NOut == 1)

      val trained = new TrainerBuilder(TrainingParam(miniBatch = 10, maxIter = 1000, storageLevel = StorageLevel.MEMORY_ONLY))
        .build(network, train, test, SquaredErr, (x: Boolean) ⇒ if (x) DenseVector(1.0) else DenseVector(0.0), "XORTest")
        .getTrainedNetwork

      (0 until 10).foreach { _ ⇒
        val (in, exp) = data(Math.floor(Math.random() * data.length).toInt)
        val out = trained.predictSoft(in)
        println(s"IN : $in, EXPECTED: $exp, OUTPUT $out")
      }
    } finally {
      sc.stop()
    }
  }

  class ConcatLayer extends NetworkConcatLayer[DataVec] {
    override implicit protected val evidenceI: ClassTag[Array[DataVec]] = classTag[Array[DataVec]]
  }

} 
Example 36
Source File: TestSpeed.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
import breeze.linalg.DenseVector
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.{BasicLayer, VectorRBFLayer}
import com.github.nearbydelta.deepspark.network.SimpleNetwork
import com.github.nearbydelta.deepspark.train.{TrainerBuilder, TrainingParam}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.{SparkConf, SparkContext}


object TestSpeed {
  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local[5]").setAppName("TestXOR")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.broadcast.blockSize", "40960")
      .set("spark.akka.frameSize", "50")
    val sc = new SparkContext(conf)

    val data = (0 to 100).collect {
      case i if i > 75 || i < 25 ⇒
        (0 to 100).collect {
          case j if j > 75 || j < 25 ⇒
            val xor =
              if (i > 75 && j < 25) true
              else if (i < 25 && j > 75) true
              else false
            (DenseVector[Double](i / 100.0, j / 100.0), xor)
        }
    }.flatMap(x ⇒ x)

    val train = sc.makeRDD(data)
    val test = train

    try {
      Weight.scalingDownBy(10.0)
      val builder = new AdaGrad(l2decay = 0.001, rate = 0.01)
      val rbf = new VectorRBFLayer withActivation GaussianRBF withCenters Seq(DenseVector(1.0, 1.0), DenseVector(0.0, 0.0), DenseVector(1.0, 0.0), DenseVector(0.0, 1.0))
      val network = new SimpleNetwork[Boolean]()
        //        .add(new BasicLayer withInput 2 withOutput 4)
        .add(rbf)
        //        .add(new BasicLayer withActivation LeakyReLU withOutput 4)
        .add(new BasicLayer withActivation SoftmaxCEE withOutput 2)
        .initiateBy(builder)

      println(rbf.epsilon.value)
      require(network.NOut == 2)
      //      require(network.layers.head.asInstanceOf[BasicLayer].bias != null)
      //      require(network.layers.head.asInstanceOf[BasicLayer].weight.value != null)
      //      require(network.layers.head.asInstanceOf[BasicLayer].bias.value.length > 0)

      val trained = new TrainerBuilder(TrainingParam(miniBatch = 10, maxIter = 100, dataOnLocal = true,
        reuseSaveData = true, storageLevel = StorageLevel.MEMORY_ONLY))
        .build(network, train, test, CrossEntropyErr,
          (x: Boolean) ⇒ if (x) DenseVector(1.0, 0.0) else DenseVector(0.0, 1.0), "XORTest")
        .getTrainedNetwork
      println(rbf.epsilon.value)

      (0 until 10).foreach { _ ⇒
        val (in, exp) = data(Math.floor(Math.random() * data.length).toInt)
        val out = trained.predictSoft(in)
        println(s"IN : $in, EXPECTED: $exp, OUTPUT ${out(0) > out(1)} $out")
      }
    } finally {
      sc.stop()
    }
  }
} 
Example 37
Source File: min-ppl-examples.scala    From blog   with Apache License 2.0 5 votes vote down vote up
object MinPplExamples2 {

  import MinPpl2._
  import breeze.stats.{meanAndVariance => meanVar}
  import breeze.linalg.DenseVector
  import cats._
  import cats.implicits._
  import cats.syntax._

  // Zip vs flatMap
  def example1 = {
    println("binding with for")
    val prior1 = for {
      x <- Normal(0,1)
      y <- Gamma(1,1)
      z <- Poisson(10)
    } yield (x,y,z)
    println(meanVar(prior1.empirical.map(_._2)))
    println("binding with flatMap")
    val prior2 =
      Normal(0,1) flatMap {x =>
        Gamma(1,1) flatMap {y =>
          Poisson(10) map {z =>
            (x,y,z)}}}
    println(meanVar(prior2.empirical.map(_._2)))
    println("tupling")
    val prior3 = Applicative[Prob].tuple3(Normal(0,1), Gamma(1,1), Poisson(10))
    println(meanVar(prior3.empirical.map(_._2)))
    print("done")
  }

  // Poisson DGLM
  def example2 = {

    val data = List(2,1,0,2,3,4,5,4,3,2,1)

    val prior = for {
      w <- Gamma(1, 1)
      state0 <- Normal(0.0, 2.0)
    } yield (w, List(state0))
    
    def addTimePointSimple(current: Prob[(Double, List[Double])],
      obs: Int): Prob[(Double, List[Double])] = {
      println(s"Conditioning on observation: $obs")
      val updated = for {
        tup <- current
        (w, states) = tup
        os = states.head
        ns <- Normal(os, w)
        _ <- Poisson(math.exp(ns)).fitQ(obs)
      } yield (w, ns :: states)
      updated.resample
    }

    def addTimePoint(current: Prob[(Double, List[Double])],
      obs: Int): Prob[(Double, List[Double])] = {
      println(s"Conditioning on observation: $obs")
      val predict = for {
        tup <- current
        (w, states) = tup
        os = states.head
        ns <- Normal(os, w)
      }
      yield (w, ns :: states)
      val updated = for {
        tup <- predict
        (w, states) = tup
        st = states.head
        _ <- Poisson(math.exp(st)).fitQ(obs)
      } yield (w, states)
      updated.resample
    }

    val mod = data.foldLeft(prior)(addTimePoint(_,_)).empirical
    print("w  : ")
    println(meanVar(mod map (_._1)))
    print("s0 : ")
    println(meanVar(mod map (_._2.reverse.head)))
    print("sN : ")
    println(meanVar(mod map (_._2.head)))

  }



  // Main entry point

  def main(args: Array[String]): Unit = {
    println("Hi")
    //example1
    example2
    println("Bye")
  }

}

// eof 
Example 38
Source File: AverageLedger.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.word.layer

import breeze.linalg.{DenseVector, axpy}
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.word.{LedgerBuilder, LedgerModel}

import scala.collection.parallel.ParSeq


class AverageLedger extends Ledger[DataVec] {
  override val outVecOf: (DataVec) ⇒ DataVec = x ⇒ x

  override def apply(x: Array[Int]): DataVec = {
    if (x.nonEmpty) {
      val matrix = DenseVector.zeros[Double](NOut)
      val it = x.toIterator
      val factor = 1.0 / x.length
      while (it.hasNext) {
        axpy(factor, vectorOf(it.next()), matrix)
      }
      matrix
    } else
      pad
  }

  override def backprop(seq: ParSeq[((Array[Int], DataVec), DataVec)]): (ParSeq[DataVec], ParSeq[() ⇒ Unit]) = {
    seq.foreach { case ((in, _), err) ⇒
      if (in.nonEmpty) {
        err :/= in.length.toDouble
        val it = in.iterator
        while (it.hasNext) {
          updateWord(it.next(), err)
        }
      } else
        updateWord(padID, err)
    }

    (null, ParSeq(algorithm.update))
  }

  override def withModel(model: LedgerModel, builder: LedgerBuilder): this.type = {
    NOut = model.dimension
    super.withModel(model, builder)
  }
} 
Example 39
Source File: BatchModeTest.scala    From neuroflow   with Apache License 2.0 5 votes vote down vote up
import org.scalatest.FunSuite
import breeze.linalg.DenseVector
import neuroflow.core.Activators.Double._
import neuroflow.core._
import neuroflow.dsl._


class BatchModeTest extends FunSuite {

  test("Batch Mode for Dense Net CPU") {

    import neuroflow.nets.cpu.DenseNetwork._
    implicit val weights = WeightBreeder[Double].random(-1, 1)
    val f = Sigmoid
    val net = Network(layout = Vector(2) :: Dense(3, f) :: Dense(10, f) :: SquaredError())
    val batch = (1 to 100).map { _ => DenseVector.rand[Double](size = 2) }
    val res = net.batchApply(batch)

    assert(res.size == batch.size)

  }

  test("Batch Mode for Conv Net CPU") {

    import neuroflow.nets.cpu.ConvNetwork._
    implicit val weights = WeightBreeder[Double].random(-1, 1)
    val f = Sigmoid
    val net = Network(layout =
      Convolution((1, 2, 1), (0, 0), (1, 2), (1, 1), 3, f) :: Dense(10, f) :: SquaredError()
    )
    val batch = (1 to 100).map { _ => Tensor3D.fromVector(DenseVector.rand[Double](size = 2)) }
    val res = net.batchApply(batch)

    assert(res.size == batch.size)

  }

  test("Batch Mode for Dense Net GPU") {

    import neuroflow.nets.gpu.DenseNetwork._
    implicit val weights = WeightBreeder[Double].random(-1, 1)
    val f = Sigmoid
    val net = Network(layout = Vector(2) :: Dense(3, f) :: Dense(10, f) :: SquaredError())
    val batch = (1 to 100).map { _ => DenseVector.rand[Double](size = 2) }
    val res = net.batchApply(batch)

    assert(res.size == batch.size)

  }

  test("Batch Mode for Conv Net GPU") {

    import neuroflow.nets.gpu.ConvNetwork._
    implicit val weights = WeightBreeder[Double].random(-1, 1)
    val f = Sigmoid
    val net = Network(layout =
      Convolution((1, 2, 1), (0, 0), (1, 2), (1, 1), 3, f) :: Dense(10, f) :: SquaredError()
    )
    val batch = (1 to 100).map { _ => Tensor3D.fromVector(DenseVector.rand[Double](size = 2)) }
    val res = net.batchApply(batch)

    assert(res.size == batch.size)

  }

} 
Example 40
Source File: BatchBreeder.scala    From neuroflow   with Apache License 2.0 5 votes vote down vote up
package neuroflow.core

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.storage.Zero
import neuroflow.common.Logs

import scala.reflect.ClassTag


  def breedCNN[V: ClassTag : Zero](xs: Seq[Tensor3D[V]], ys: Seq[DenseVector[V]], batchSize: Int): (Seq[(DenseMatrix[V], DenseMatrix[V])], Map[Int, Int]) = {

    val xsys = xs.zip(ys).grouped(batchSize).zipWithIndex.toSeq.par.map { case (xy, batchNo) =>

      val x = horzCatTensorBatch(xy.map(_._1))
      val y = DenseMatrix.zeros[V](xy.size, xy.head._2.length)

      (0 until y.rows).foreach { row =>
        (0 until y.cols).foreach { col =>
          y.update(row, col, xy(row)._2(col))
        }
      }

      debug(s"Bred Batch $batchNo.")

      (x -> y) -> xy.size

    }.seq

    xsys.map(_._1) -> xsys.zipWithIndex.map(b => b._2 -> b._1._2).toMap

  }

  def vertCatVectorBatch[V: ClassTag : Zero](xs: Seq[DenseVector[V]]): DenseMatrix[V] = {
    val x = DenseMatrix.zeros[V](xs.size, xs.head.length)
    (0 until x.rows).foreach { row =>
      (0 until x.cols).foreach { col =>
        x.update(row, col, xs(row)(col))
      }
    }
    x
  }

  def horzCatTensorBatch[V: ClassTag : Zero](ts: Seq[Tensor3D[V]]): DenseMatrix[V] = {
    val x = DenseMatrix.zeros[V](ts.head.matrix.rows, ts.head.matrix.cols * ts.size)
    (0 until x.rows).foreach { row =>
      (0 until x.cols).foreach { col =>
        val b = col / ts.head.matrix.cols
        val c = col % ts.head.matrix.cols
        x.update(row, col, ts(b).matrix(row, c))
      }
    }
    x
  }

  def unsliceMatrixByRow[V: ClassTag : Zero](m: DenseMatrix[V]): Seq[DenseVector[V]] = {
    (0 until m.rows).map { r =>
      val v = m(r, ::).t
      v
    }
  }

} 
Example 41
Source File: Tensor.scala    From neuroflow   with Apache License 2.0 5 votes vote down vote up
package neuroflow.core

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.math.Semiring
import breeze.storage.Zero

import scala.reflect.ClassTag


  def deepCat[V: ClassTag : Zero](ts: Seq[Tensor3D[V]]): Tensor3D[V] = {
    val x = ts.head.X
    val y = ts.head.Y
    val z = ts.map(_.Z).sum
    require(ts.forall(t => t.X == x && t.Y == y), "All tensors must share same dimension X, Y!")
    val mergedMat = ts.map(_.matrix).reduce((a, b) => DenseMatrix.vertcat(a, b))
    new Tensor3DImpl[V](mergedMat, X = x, Y = y, Z = z)
  }

}


class Tensor3DImpl[V](val matrix: DenseMatrix[V], val X: Int, val Y: Int, val Z: Int) extends Tensor3D[V] {

  def mapAll[T: ClassTag : Zero](f: V => T): Tensor3D[T] = {
    new Tensor3DImpl(matrix.map(f), X, Y, Z)
  }

  def mapAt(x: (Int, Int, Int))(f: V => V): Tensor3D[V] = {
    val newMat = matrix.copy
    val (row, col) = projection(x._1, x._2, x._3)
    newMat.update(row, col, f(apply(x)))
    new Tensor3DImpl(newMat, X, Y, Z)
  }

  def updateAt(x: (Int, Int, Int))(v: V): Unit = {
    val (row, col) = projection(x._1, x._2, x._3)
    matrix.update(row, col, v)
  }

} 
Example 42
Source File: ActiveShapeModelIOTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.io

import java.io.File
import java.net.URLDecoder

import breeze.linalg.{DenseMatrix, DenseVector}
import scalismo.ScalismoTestSuite
import scalismo.numerics.FixedPointsUniformMeshSampler3D
import scalismo.statisticalmodel.MultivariateNormalDistribution
import scalismo.statisticalmodel.asm._
import scalismo.utils.Random

import scala.collection.immutable

class ActiveShapeModelIOTests extends ScalismoTestSuite {
  implicit val rng = Random(42L)

  private def createTmpH5File(): File = {
    val f = File.createTempFile("hdf5file", ".h5")
    f.deleteOnExit()
    f
  }

  private def createAsm(): ActiveShapeModel = {
    val statismoFile = new File(URLDecoder.decode(getClass.getResource("/facemodel.h5").getPath, "UTF-8"))
    val shapeModel = StatismoIO.readStatismoMeshModel(statismoFile).get

    val (sprofilePoints, _) = new FixedPointsUniformMeshSampler3D(shapeModel.referenceMesh, 100).sample.unzip
    val pointIds = sprofilePoints.map { point =>
      shapeModel.referenceMesh.pointSet.findClosestPoint(point).id
    }
    val dists =
      for (i <- pointIds.indices)
        yield new MultivariateNormalDistribution(DenseVector.ones[Double](3) * i.toDouble,
                                                 DenseMatrix.eye[Double](3) * i.toDouble)
    val profiles = new Profiles(pointIds.to[immutable.IndexedSeq].zip(dists).map { case (i, d) => Profile(i, d) })
    new ActiveShapeModel(shapeModel,
                         profiles,
                         GaussianGradientImagePreprocessor(1),
                         NormalDirectionFeatureExtractor(1, 1))
  }

  describe("An active shape model") {

    it("can be written to disk and read again") {
      val originalAsm = createAsm()
      val h5file = createTmpH5File()

      ActiveShapeModelIO.writeActiveShapeModel(originalAsm, h5file).get
      val newAsm = ActiveShapeModelIO.readActiveShapeModel(h5file).get

      newAsm should equal(originalAsm)
      h5file.delete()
    }
  }

} 
Example 43
Source File: LandmarkIOTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.io

import java.io.{ByteArrayOutputStream, File, InputStream}
import java.net.URLDecoder

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.geometry._
import scalismo.statisticalmodel.MultivariateNormalDistribution

import scala.io.Source
import scala.language.implicitConversions
import scala.collection.immutable.Seq

class LandmarkIOTests extends ScalismoTestSuite {

  implicit def doubleToFloat(d: Double): Float = d.toFloat

  implicit def inputStreamToSource(s: InputStream): Source = Source.fromInputStream(s)

  describe("Spray LandmarkIO") {

    val csvName = "/landmarks.csv"
    def csvStream() = getClass.getResourceAsStream(csvName)

    val jsonName = "/landmarks.json"
    def jsonStream() = getClass.getResourceAsStream(jsonName)

    

    def distWithDefaultVectors(d1: Double, d2: Double, d3: Double): MultivariateNormalDistribution = {
      val axes = List(DenseVector[Double](1, 0, 0), DenseVector[Double](0, 1, 0), DenseVector[Double](0, 0, 1))
      val devs = List(d1, d2, d3)
      val data = axes zip devs
      MultivariateNormalDistribution(DenseVector[Double](0, 0, 0), data)
    }

    val jsonLm1 = Landmark("one", Point(1, 2, 3))
    val jsonLm2 = Landmark("two", Point(2, 3, 4), Some("Landmark two"), Some(distWithDefaultVectors(1, 4, 9)))
    val jsonLms = List(jsonLm1, jsonLm2)

    it("can serialize and deserialize simple landmarks using JSON") {
      val out = new ByteArrayOutputStream()
      LandmarkIO.writeLandmarksJsonToStream(jsonLms, out)
      val written = new String(out.toByteArray)
      val read = LandmarkIO.readLandmarksJsonFromSource[_3D](Source.fromString(written)).get
      read should equal(jsonLms)
    }

    it("can read simple landmarks from a JSON Stream") {
      val read = LandmarkIO.readLandmarksJsonFromSource[_3D](jsonStream()).get
      read should equal(jsonLms)
    }

  }
} 
Example 44
Source File: ImageTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.image

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.common.{BoxDomain, PointId, Scalar, ScalarArray}
import scalismo.geometry.IntVector.implicits._
import scalismo.geometry.Point.implicits._
import scalismo.geometry.EuclideanVector.implicits._
import scalismo.geometry._
import scalismo.registration.TranslationSpace

import scala.language.implicitConversions
import scala.reflect.ClassTag

class ImageTests extends ScalismoTestSuite {

  implicit def arrayToScalarArray[A: Scalar: ClassTag](a: Array[A]): ScalarArray[A] = ScalarArray(a)

  describe("A discrete 1D image") {
    it("returns the same points for a 1d index and a coordinate index") {
      val domain = DiscreteImageDomain[_1D](0.0, 1.0, 5)
      val discreteImage = DiscreteScalarImage(domain, Seq(3.0, 2.0, 1.5, 1, 0))

      for (i <- 0 until domain.size(0)) {
        assert(discreteImage(i) == discreteImage(i))
      }
    }
  }

  describe("A discrete 2D image") {
    it("returns the same points for a 1d index and a (2d) coordinate index") {
      val domain = DiscreteImageDomain[_2D]((0.0, 0.0), (1.0, 2.0), (3, 2))
      val discreteImage = DiscreteScalarImage(domain, Seq(3.0, 2.0, 1.5, 1.0, 0.0, 4.0))

      for (y <- 0 until domain.size(1);
           x <- 0 until domain.size(0)) {
        assert(discreteImage(PointId(y * domain.size(0) + x)) === discreteImage((x, y)))
      }
    }
  }

  describe("A continuous 1D image") {
    it("yields the right values after composing with a translation") {

      val image = DifferentiableScalarImage(BoxDomain(-4.0, 6.0),
                                            (x: Point[_1D]) => Math.sin(x(0).toDouble).toFloat,
                                            (x: Point[_1D]) => EuclideanVector(Math.cos(x(0).toDouble).toFloat))
      val translationTransform = TranslationSpace[_1D].transformForParameters(DenseVector(1.0))
      val composedImage = image.compose(translationTransform)
      assert(composedImage.isDefinedAt(-4.0) === true)
      assert(composedImage.isDefinedAt(5.0) === true)
      assert(composedImage.isDefinedAt(-4.5) === true)
      assert(composedImage.isDefinedAt(5.5) === false)
      composedImage(0.0) should be(image(1.0) +- 1e-5f)
    }

    it("yields the right values after warping with a translation") {

      val image = DifferentiableScalarImage(BoxDomain(-4.0, 6.0),
                                            (x: Point[_1D]) => Math.sin(x(0).toDouble).toFloat,
                                            (x: Point[_1D]) => EuclideanVector(Math.cos(x(0).toDouble).toFloat))

      val translationTransform = TranslationSpace[_1D].transformForParameters(DenseVector(-1.0))

      val warpedImage = image.compose(translationTransform)

      warpedImage.isDefinedAt(-4.0) should equal(false)
      warpedImage.isDefinedAt(-3.0) should equal(true)
      warpedImage.isDefinedAt(5.0) should equal(true)
      warpedImage.isDefinedAt(-3.5) should equal(false)
      warpedImage.isDefinedAt(5.5) should equal(true)
      warpedImage.isDefinedAt(6.5) should equal(true)
      warpedImage.isDefinedAt(7.0) should equal(true)

      warpedImage(0.0) should be(image(-1.0) +- 1e-5f)
    }
  }

  describe("A continuous 2D image") {
    it("can be translated to a new place") {

      val cImg = ScalarImage(BoxDomain((0.0, 0.0), (1.0, 1.0)), (_: Point[_2D]) => 1f)

      def t = TranslationSpace[_2D].transformForParameters(DenseVector(2.0, 2.0))
      val warpedImg = cImg.compose(t)

      warpedImg.isDefinedAt((-0.5, -0.5)) should equal(false)
      warpedImg.isDefinedAt((-2.5, -2.5)) should equal(false)
      warpedImg.isDefinedAt((-1.5, -1.5)) should equal(true)
      warpedImg((-1.5, -1.5)) should be(1.0)
    }
  }
} 
Example 45
Source File: MeshTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.mesh

import java.io.File
import java.net.URLDecoder

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.common.{PointId, UnstructuredPointsDomain}
import scalismo.geometry.Point.implicits._
import scalismo.geometry.{_3D, Point}
import scalismo.io.MeshIO
import scalismo.registration.{RotationSpace, ScalingSpace}

import scala.language.implicitConversions

class MeshTests extends ScalismoTestSuite {

  implicit def doubleToFloat(d: Double): Float = d.toFloat
  implicit def intToPointId(i: Int): PointId = PointId(i)

  describe("a mesh") {
    val path = getClass.getResource("/facemesh.stl").getPath
    val facemesh = MeshIO.readMesh(new File(URLDecoder.decode(path, "UTF-8"))).get

    it("finds the right closest points for all the points that define the mesh") {

      for ((pt, id) <- facemesh.pointSet.points.zipWithIndex) {
        val ptWithID = facemesh.pointSet.findClosestPoint(pt)
        val closestPt = ptWithID.point
        val closestId = ptWithID.id
        assert(closestPt === pt)
        assert(closestId.id === id)
      }
    }
    it("finds the right closest point for a point that is not defined on the mesh") {
      val pts = IndexedSeq(Point(0.0, 0.0, 0.0), Point(1.0, 1.0, 1.0), Point(1.0, 1.0, 5.0))
      val cells = IndexedSeq(TriangleCell(0, 1, 2))
      val mesh = TriangleMesh3D(UnstructuredPointsDomain(pts), TriangleList(cells))

      val newPt = Point(1.1, 1.1, 4)
      val ptWithID = mesh.pointSet.findClosestPoint(newPt)
      val closestPt = ptWithID.point
      val closestPtId = ptWithID.id
      assert(closestPtId.id === 2)
      assert(closestPt === pts(2))
    }
    it("computes its area correctly for a triangle") {
      val pts: IndexedSeq[Point[_3D]] = IndexedSeq((0.0, 0.0, 0.0), (0.0, 1.0, 0.0), (1.0, 0.0, 0.0))
      val cells = IndexedSeq(TriangleCell(0, 1, 2))
      val mesh = TriangleMesh3D(UnstructuredPointsDomain(pts), TriangleList(cells))

      val R = RotationSpace[_3D]((0.0, 0.0, 0.0)).transformForParameters(DenseVector(0.3, 0.4, 0.1))
      val s = ScalingSpace[_3D].transformForParameters(DenseVector(2.0))
      val transformedMesh = mesh.transform(R).transform(s)
      mesh.area should be(0.5 +- 1e-8)
      transformedMesh.area should be(4.0f * mesh.area +- 1e-5) // scaling by two gives 4 times the area
    }

    it("computes the right binary image for the unit sphere") {
      val path = getClass.getResource("/unit-sphere.stl").getPath
      val spheremesh = MeshIO.readMesh(new File(URLDecoder.decode(path, "UTF-8"))).get
      val binaryImg = spheremesh.operations.toBinaryImage
      binaryImg(Point(0, 0, 0)) should be(1)
      binaryImg(Point(2, 0, 0)) should be(0)
    }

    it("can have an empty cell list") {
      val pts = IndexedSeq(Point(0.0, 0.0, 0.0), Point(1.0, 1.0, 1.0), Point(1.0, 1.0, 5.0))
      val cells = IndexedSeq[TriangleCell]()
      try {
        TriangleMesh3D(UnstructuredPointsDomain(pts), TriangleList(cells)) // would throw exception on fail
      } catch {
        case e: Exception => fail("It should be possible to create triangleMesh with an empty cell list")
      }
    }
  }
} 
Example 46
Source File: PivotedCholeskyTest.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.numerics

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.common.BoxDomain3D
import scalismo.geometry.{_1D, _3D, Point}
import scalismo.kernels.{DiagonalKernel, GaussianKernel, Kernel}
import scalismo.utils.Random

class PivotedCholeskyTest extends ScalismoTestSuite {

  implicit val rng = Random(42L)

  describe("The Pivoted Cholesky ") {

    it("accurately approximates a covariance matrix from a random set of points and a kernel k in 1D") {

      val pts = DenseVector.rand[Double](60).toArray.map(v => Point(v.toFloat))
      val k = GaussianKernel[_1D](1.0)
      val matrixValuedK = DiagonalKernel[_1D](k, 1)
      val m = Kernel.computeKernelMatrix[_1D](pts, matrixValuedK)
      val eigCholesky =
        PivotedCholesky.computeApproximateEig(matrixValuedK, pts, PivotedCholesky.RelativeTolerance(1e-15))
      val (u, d) = eigCholesky
      val D = (u * breeze.linalg.diag(d) * u.t) - m
      Math.sqrt(breeze.linalg.trace(D * D.t)) should be <= 1e-5
    }

    it("accurately approximates a covariance matrix from a random set of points and a kernel k in 3D") {

      val boxDomain = BoxDomain3D(Point(0.0, 0.0, 0.0), Point(1.0, 1.0, 1.0))
      val uniformSampler = UniformSampler[_3D](boxDomain, 20)
      val pts = uniformSampler.sample.map(_._1)
      val k = GaussianKernel[_3D](1.0)
      val matrixValuedK = DiagonalKernel[_3D](k, 3)
      val m = Kernel.computeKernelMatrix[_3D](pts, matrixValuedK)
      val eigCholesky =
        PivotedCholesky.computeApproximateEig(matrixValuedK, pts, PivotedCholesky.RelativeTolerance(1e-15))
      val (u, d) = eigCholesky
      val D = (u * breeze.linalg.diag(d) * u.t) - m
      Math.sqrt(breeze.linalg.trace(D * D.t)) should be <= 1e-5
    }

  }

} 
Example 47
Source File: ActiveShapeModelTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.statisticalmodel

import java.io.File
import java.net.URLDecoder

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.geometry.{_3D, Point}
import scalismo.io.{ImageIO, MeshIO, StatismoIO}
import scalismo.mesh.{MeshMetrics, TriangleMesh}
import scalismo.numerics.{Sampler, UniformMeshSampler3D}
import scalismo.registration.LandmarkRegistration
import scalismo.statisticalmodel.asm._
import scalismo.statisticalmodel.dataset.DataCollection
import scalismo.utils.Random

class ActiveShapeModelTests extends ScalismoTestSuite {

  describe("An active shape model") {

    implicit val random = Random(42)

    object Fixture {
      val imagePreprocessor = GaussianGradientImagePreprocessor(0.1f)
      // number of points should usually be an odd number, so that the profiles are centered on the profiled points
      val featureExtractor = NormalDirectionFeatureExtractor(numberOfPoints = 5, spacing = 1.0)
      def samplerPerMesh(mesh: TriangleMesh[_3D]): Sampler[_3D] = UniformMeshSampler3D(mesh, numberOfPoints = 1000)
      val searchMethod = NormalDirectionSearchPointSampler(numberOfPoints = 31, searchDistance = 6)
      val fittingConfig =
        FittingConfiguration(featureDistanceThreshold = 2.0, pointDistanceThreshold = 3.0, modelCoefficientBounds = 3.0)

      val path: String = URLDecoder.decode(getClass.getResource(s"/asmData/model.h5").getPath, "UTF-8")
      val shapeModel = StatismoIO.readStatismoMeshModel(new File(path)).get
      val nbFiles = 7
      // use iterators so files are only loaded when required (and memory can be reclaimed after use)
      val meshes = (0 until nbFiles).toIterator map { i =>
        val meshPath: String = getClass.getResource(s"/asmData/$i.stl").getPath
        MeshIO.readMesh(new File(URLDecoder.decode(meshPath, "UTF-8"))).get
      }
      val images = (0 until nbFiles).toIterator map { i =>
        val imgPath: String = getClass.getResource(s"/asmData/$i.vtk").getPath
        ImageIO.read3DScalarImage[Float](new File(URLDecoder.decode(imgPath, "UTF-8"))).get
      }

      val targetImage = images.next()
      val targetMesh = meshes.next()
      val trainMeshes = meshes
      val trainImages = images

      val dc = DataCollection.fromMeshSequence(shapeModel.referenceMesh, trainMeshes.toIndexedSeq)._1.get
      val trainingData = trainImages zip dc.dataItems.toIterator.map(_.transformation)

      val asm =
        ActiveShapeModel.trainModel(shapeModel, trainingData, imagePreprocessor, featureExtractor, samplerPerMesh)

      // align the model
      val alignment = LandmarkRegistration.rigid3DLandmarkRegistration(
        (asm.statisticalModel.mean.pointSet.points zip targetMesh.pointSet.points).toIndexedSeq,
        Point(0, 0, 0)
      )
      val alignedASM = asm.transform(alignment)

    }
    it("Can be built, transformed and correctly fitted from/to artificial data") {

      val fit = Fixture.alignedASM.fit(Fixture.targetImage, Fixture.searchMethod, 20, Fixture.fittingConfig).get.mesh
      assert(MeshMetrics.diceCoefficient(fit, Fixture.targetMesh) > 0.94)
    }

    it("Can be transformed correctly from within the fitting") {

      val nullInitialParameters = DenseVector.zeros[Double](Fixture.asm.statisticalModel.rank)
      val fit = Fixture.asm
        .fit(Fixture.targetImage,
             Fixture.searchMethod,
             20,
             Fixture.fittingConfig,
             ModelTransformations(nullInitialParameters, Fixture.alignment))
        .get
        .mesh
      assert(MeshMetrics.diceCoefficient(fit, Fixture.targetMesh) > 0.95)
    }
  }

} 
Example 48
Source File: StatisticalVolumeModelTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.statisticalmodel.experimental

import java.io.File
import java.net.URLDecoder

import breeze.linalg.DenseVector
import breeze.stats.distributions.Gaussian
import scalismo.ScalismoTestSuite
import scalismo.geometry.{_3D, Point}
import scalismo.io.StatismoIO
import scalismo.registration.{RigidTransformation, RigidTransformationSpace}
import scalismo.utils.Random

class StatisticalVolumeModelTests extends ScalismoTestSuite {

  implicit val random = Random(42)

  implicit def doubleToFloat(d: Double): Float = d.toFloat

  describe("A statistical Volume mesh model") {

    def compareModels(oldModel: StatisticalVolumeMeshModel, newModel: StatisticalVolumeMeshModel) {

      for (i <- 0 until 10) {
        val standardNormal = Gaussian(0, 1)(random.breezeRandBasis)
        val coeffsData = standardNormal.sample(oldModel.rank)
        val coeffs = DenseVector(coeffsData.toArray)
        val inst = oldModel.instance(coeffs)
        val instNew = newModel.instance(coeffs)
        inst.pointSet.points
          .zip(instNew.pointSet.points)
          .foreach {
            case (pt1, pt2) =>
              (pt1.toVector - pt2.toVector).norm should be(0.0 +- (0.1))
          }
      }
    }

    it("can be transformed forth and back and yield the same deformations") {
      val path = getClass.getResource("/TetraMeshModel2.h5").getPath
      val model = StatismoIO.readStatismoVolumeMeshModel(new File(URLDecoder.decode(path))).get

      val parameterVector = DenseVector[Double](1.5, 1.0, 3.5, Math.PI, -Math.PI / 2.0, -Math.PI)
      val rigidTransform = RigidTransformationSpace[_3D]().transformForParameters(parameterVector)
      val inverseTransform = rigidTransform.inverse.asInstanceOf[RigidTransformation[_3D]]
      val transformedModel = model.transform(rigidTransform)
      val newModel = transformedModel.transform(inverseTransform)
      compareModels(model, newModel)
    }

    it("can change the mean shape and still yield the same shape space") {

      val path = getClass.getResource("/TetraMeshModel2.h5").getPath
      val model = StatismoIO.readStatismoVolumeMeshModel(new File(URLDecoder.decode(path))).get

      val newMesh = model.sample

      def t(pt: Point[_3D]): Point[_3D] = {
        val ptId = model.referenceVolumeMesh.pointSet.findClosestPoint(pt).id
        newMesh.pointSet.point(ptId)
      }

      val newModel = model.changeReference(t)

      compareModels(model, newModel)
    }

  }
} 
Example 49
Source File: Kernel.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package com.linkedin.photon.ml.hyperparameter.estimators.kernels

import breeze.linalg.{DenseMatrix, DenseVector}


  def expandDimensions(param: DenseVector[Double], dim: Int): DenseVector[Double] = {
    require(param.length == 1 || param.length == dim,
      "Parameter must contain one global scale or a scale for each feature")

    if (param.length != dim) {
      DenseVector(Array.fill(dim)(param(0)))
    } else {
      param
    }
  }
} 
Example 50
Source File: MeanValueImputer.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.impute

import breeze.linalg.DenseVector
import breeze.stats.mean
import cats.syntax.option._
import io.picnicml.doddlemodel.data.Feature.FeatureIndex
import io.picnicml.doddlemodel.data.{Features, RealVector}
import io.picnicml.doddlemodel.syntax.OptionSyntax._
import io.picnicml.doddlemodel.typeclasses.Transformer


case class MeanValueImputer private (private[impute] val means: Option[RealVector],
                                     private val featureIndex: FeatureIndex)

object MeanValueImputer {

  def apply(featureIndex: FeatureIndex): MeanValueImputer = MeanValueImputer(none, featureIndex)

  @SerialVersionUID(0L)
  implicit val ev: Transformer[MeanValueImputer] = new Transformer[MeanValueImputer] {

    override def isFitted(model: MeanValueImputer): Boolean = model.means.isDefined

    override def fit(model: MeanValueImputer, x: Features): MeanValueImputer = {
      val xToPreprocess = x(::, model.featureIndex.numerical.columnIndices)
      val means = DenseVector.zeros[Float](xToPreprocess.cols)
      0 until xToPreprocess.cols foreach { colIndex =>
        means(colIndex) = mean(xToPreprocess(xToPreprocess(::, colIndex).findAll(!_.isNaN), colIndex))
      }
      model.copy(means.some)
    }

    override protected def transformSafe(model: MeanValueImputer, x: Features): Features = {
      val xCopy = x.copy
      model.featureIndex.numerical.columnIndices.zipWithIndex.foreach { case (colIndex, statisticIndex) =>
        xCopy(::, colIndex).findAll(_.isNaN).iterator.foreach { rowIndex =>
          xCopy(rowIndex, colIndex) = model.means.getOrBreak(statisticIndex)
        }
      }
      xCopy
    }
  }
} 
Example 51
Source File: MostFrequentValueImputer.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.impute

import breeze.linalg.{DenseVector, SliceVector}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.Feature.FeatureIndex
import io.picnicml.doddlemodel.data.{Features, RealVector}
import io.picnicml.doddlemodel.syntax.OptionSyntax._
import io.picnicml.doddlemodel.typeclasses.Transformer


case class MostFrequentValueImputer private (private[impute] val mostFrequent: Option[RealVector],
                                             private val featureIndex: FeatureIndex)

object MostFrequentValueImputer {

  def apply(featureIndex: FeatureIndex): MostFrequentValueImputer =
    MostFrequentValueImputer(None, featureIndex)

  @SerialVersionUID(0L)
  implicit lazy val ev: Transformer[MostFrequentValueImputer] = new Transformer[MostFrequentValueImputer] {

    override def isFitted(model: MostFrequentValueImputer): Boolean = model.mostFrequent.isDefined

    override def fit(model: MostFrequentValueImputer, x: Features): MostFrequentValueImputer = {
      val xToPreprocess = x(::, model.featureIndex.categorical.columnIndices)
      val mostFrequent = DenseVector.zeros[Float](xToPreprocess.cols)
      0 until xToPreprocess.cols foreach { colIndex =>
        mostFrequent(colIndex) = getMostFrequent(xToPreprocess(xToPreprocess(::, colIndex).findAll(!_.isNaN), colIndex))
      }
      model.copy(mostFrequent.some)
    }

    private def getMostFrequent(column: SliceVector[(Int, Int), Float]): Float = {
      val counts = scala.collection.mutable.Map.empty[Float, Int].withDefaultValue(0)
      column.foreachValue(value => counts(value) = counts(value) + 1)
      counts.maxBy(_._2)._1
    }

    override protected def transformSafe(model: MostFrequentValueImputer, x: Features): Features = {
      val xCopy = x.copy
      model.featureIndex.categorical.columnIndices.zipWithIndex.foreach { case (colIndex, statisticIndex) =>
        xCopy(::, colIndex).findAll(_.isNaN).iterator.foreach { rowIndex =>
          xCopy(rowIndex, colIndex) = model.mostFrequent.getOrBreak(statisticIndex)
        }
      }
      xCopy
    }
  }
} 
Example 52
Source File: package.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel

import breeze.linalg.{DenseMatrix, DenseVector, unique}
import io.picnicml.doddlemodel.CrossScalaCompat.floatOrdering
import io.picnicml.doddlemodel.data.Feature.FeatureIndex

package object data {

  type RealVector = DenseVector[Float]
  type IntVector = DenseVector[Int]
  type Simplex = DenseMatrix[Float]

  type Features = DenseMatrix[Float]
  type Target = DenseVector[Float]

  type FeaturesWithIndex = (Features, FeatureIndex)
  type Dataset = (Features, Target)
  type DatasetWithIndex = (Features, Target, FeatureIndex)

  def loadBostonDataset: DatasetWithIndex = ResourceDatasetLoaders.loadBostonDataset
  def loadBreastCancerDataset: DatasetWithIndex = ResourceDatasetLoaders.loadBreastCancerDataset
  def loadIrisDataset: DatasetWithIndex = ResourceDatasetLoaders.loadIrisDataset
  def loadHighSchoolTestDataset: DatasetWithIndex = ResourceDatasetLoaders.loadHighSchoolTestDataset

  def numberOfUniqueGroups(groups: IntVector): Int = {
    val uniqueGroups = unique(groups)
    require(uniqueGroups.toArray.sorted sameElements Array.range(0, uniqueGroups.length),
      "Invalid encoding of groups, all group indices in [0, numGroups) have to exist")
    uniqueGroups.length
  }

  def numberOfTargetClasses(y: Target): Int = {
    val targetClasses = unique(y)
    require(targetClasses.length >= 2,
      "Target variable must be comprised of at least two categories")
    require(targetClasses.toArray.sorted sameElements Array.range(0, targetClasses.length),
      "Invalid encoding of categories in the target variable")
    targetClasses.length
  }
} 
Example 53
Source File: LocalKMeans.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers(i)
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    val points = new HashSet[Vector[Double]]
    val kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println(s"Initial centers: $kPoints")

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val mappings = closest.groupBy[Int] (x => x._1)

      val pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints(mapping._1), mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println(s"Final centers: $kPoints")
  }
}
// scalastyle:on println 
Example 54
Source File: SparkLR.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkLR")
      .getOrCreate()

    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = spark.sparkContext.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println(s"Initial w: $w")

    for (i <- 1 to ITERATIONS) {
      println(s"On iteration $i")
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println(s"Final w: $w")

    spark.stop()
  }
}
// scalastyle:on println 
Example 55
Source File: LocalFileLR.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{DenseVector, Vector}


object LocalFileLR {
  val D = 10   // Number of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val fileSrc = scala.io.Source.fromFile(args(0))
    val lines = fileSrc.getLines().toArray
    val points = lines.map(parsePoint)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println(s"Initial w: $w")

    for (i <- 1 to ITERATIONS) {
      println(s"On iteration $i")
      val gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    fileSrc.close()
    println(s"Final w: $w")
  }
}
// scalastyle:on println 
Example 56
Source File: SparkKMeans.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import breeze.linalg.{squaredDistance, DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 0 until centers.length) {
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkKMeans")
      .getOrCreate()

    val lines = spark.read.textFile(args(0)).rdd
    val data = lines.map(parseVector _).cache()
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42)
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println(s"Finished iteration (delta = $tempDist)")
    }

    println("Final centers:")
    kPoints.foreach(println)
    spark.stop()
  }
}
// scalastyle:on println 
Example 57
Source File: LocalLR.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{DenseVector, Vector}


object LocalLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    // Initialize w to a random value
    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println(s"Initial w: $w")

    for (i <- 1 to ITERATIONS) {
      println(s"On iteration $i")
      val gradient = DenseVector.zeros[Double](D)
      for (p <- data) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient +=  p.x * scale
      }
      w -= gradient
    }

    println(s"Final w: $w")
  }
}
// scalastyle:on println 
Example 58
Source File: SparkHdfsLR.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkHdfsLR {
  val D = 10   // Number of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    val y = tok.nextToken.toDouble
    val x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 2) {
      System.err.println("Usage: SparkHdfsLR <file> <iters>")
      System.exit(1)
    }

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkHdfsLR")
      .getOrCreate()

    val inputPath = args(0)
    val lines = spark.read.textFile(inputPath).rdd

    lines.cache()
    val points = lines.map(parsePoint).cache()
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println(s"Initial w: $w")

    for (i <- 1 to ITERATIONS) {
      println(s"On iteration $i")
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println(s"Final w: $w")
    spark.stop()
  }
}
// scalastyle:on println 
Example 59
Source File: BaseChangeStrategy.scala    From deequ   with Apache License 2.0 5 votes vote down vote up
package com.amazon.deequ.anomalydetection

import breeze.linalg.DenseVector



  override def detect(
    dataSeries: Vector[Double],
    searchInterval: (Int, Int))
  : Seq[(Int, Anomaly)] = {
    val (start, end) = searchInterval

    require(start <= end,
      "The start of the interval cannot be larger than the end.")

    val startPoint = Seq(start - order, 0).max
    val data = diff(DenseVector(dataSeries.slice(startPoint, end): _*), order).data

    data.zipWithIndex.filter { case (value, _) =>
      (value < maxRateDecrease.getOrElse(Double.MinValue)
        || value > maxRateIncrease.getOrElse(Double.MaxValue))
    }
      .map { case (change, index) =>
      (index + startPoint + order, Anomaly(Option(dataSeries(index + startPoint + order)), 1.0,
        Some(s"[AbsoluteChangeStrategy]: Change of $change is not in bounds [" +
          s"${maxRateDecrease.getOrElse(Double.MinValue)}, " +
          s"${maxRateIncrease.getOrElse(Double.MaxValue)}]. Order=$order")))
    }
  }
} 
Example 60
Source File: RelativeRateOfChangeStrategy.scala    From deequ   with Apache License 2.0 5 votes vote down vote up
package com.amazon.deequ.anomalydetection

import breeze.linalg.DenseVector


  override def diff(dataSeries: DenseVector[Double], order: Int): DenseVector[Double] = {
    require(order > 0, "Order of diff cannot be zero or negative")
    if (dataSeries.length == 0) {
      dataSeries
    } else {
      val valuesRight = dataSeries.slice(order, dataSeries.length)
      val valuesLeft = dataSeries.slice(0, dataSeries.length - order)
      valuesRight / valuesLeft
    }
  }
} 
Example 61
Source File: StratifiedClassifier.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.dummy.classification

import breeze.linalg.DenseVector
import breeze.stats.distributions.Multinomial
import cats.syntax.option._
import io.picnicml.doddlemodel.CrossScalaCompat.doubleOrdering
import io.picnicml.doddlemodel.data.{Features, Simplex, Target}
import io.picnicml.doddlemodel.dummy.classification.StratifiedClassifier.ev
import io.picnicml.doddlemodel.syntax.OptionSyntax._
import io.picnicml.doddlemodel.typeclasses.Classifier


case class StratifiedClassifier private (numClasses: Option[Int],
                                         targetDistr: Option[Multinomial[DenseVector[Double], Int]]) {

  def getTargetDistributionParams: DenseVector[Double] = {
    require(ev.isFitted(this), "Called getTargetDistributionParams on a model that is not trained yet")
    this.targetDistr.getOrBreak.params.copy
  }
}

object StratifiedClassifier {

  def apply(): StratifiedClassifier = StratifiedClassifier(none, none)

  @SerialVersionUID(0L)
  implicit lazy val ev: Classifier[StratifiedClassifier] = new Classifier[StratifiedClassifier] {

    override def numClasses(model: StratifiedClassifier): Option[Int] = model.numClasses

    override def isFitted(model: StratifiedClassifier): Boolean = model.targetDistr.isDefined

    override protected[doddlemodel] def copy(model: StratifiedClassifier, numClasses: Int): StratifiedClassifier =
      model.copy(numClasses = numClasses.some)

    override protected def fitSafe(model: StratifiedClassifier, x: Features, y: Target): StratifiedClassifier = {
      val probs = y.activeValuesIterator.foldLeft(Map[Double, Int]()) { (acc, value) =>
        val valueDouble = value.toDouble
        if (acc.contains(valueDouble)) acc + (valueDouble -> (acc(valueDouble) + 1)) else acc + (valueDouble -> 1)
      }.toArray.sortBy(_._1).map(_._2 / y.length.toDouble)

      model.copy(targetDistr = Multinomial[DenseVector[Double], Int](DenseVector(probs)).some)
    }

    override protected def predictSafe(model: StratifiedClassifier, x: Features): Target =
      DenseVector(Array.fill(x.rows)(model.targetDistr.getOrBreak.draw.toFloat))

    override protected def predictProbaSafe(model: StratifiedClassifier, x: Features): Simplex =
      throw new NotImplementedError("Method predictProbaSafe is not defined for StratifiedClassifier")
  }
} 
Example 62
Source File: ExpectedImprovement.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package com.linkedin.photon.ml.hyperparameter.criteria

import breeze.linalg.DenseVector
import breeze.numerics.sqrt
import breeze.stats.distributions.Gaussian
import com.linkedin.photon.ml.hyperparameter.estimators.PredictionTransformation


  def apply(
      predictiveMeans: DenseVector[Double],
      predictiveVariances: DenseVector[Double]): DenseVector[Double] = {

    val std = sqrt(predictiveVariances)

    // PBO Eq. 1
    val gamma = - (predictiveMeans - bestEvaluation) / std

    // Eq. 2
    std :* ((gamma :* gamma.map(standardNormal.cdf)) + gamma.map(standardNormal.pdf))
  }
} 
Example 63
Source File: SparkLR.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}

import org.apache.spark._


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Numer of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD(SGD随机梯度下降) or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS(BFGS是逆秩2拟牛顿法)
        |for more conventional use.
      """.stripMargin)
    //String.stripMargin 移除每行字符串开头的空格和第一个遇到的垂直分割符|
  }

  def main(args: Array[String]) {

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkLR").setMaster("local")
    val sc = new SparkContext(sparkConf)
    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = sc.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    //将w初始化为一个随机值
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)

    sc.stop()
  }
}
// scalastyle:on println 
Example 64
Source File: LocalFileLR.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{Vector, DenseVector}


object LocalFileLR {
  val D = 10   // Numer of dimensions 维度
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)
  //解析每一行数据,生成DataPoint对像
  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS(BFGS是逆秩2拟牛顿法)
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()
    //fromFile读取文件,转换成Array[String]
    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    //调用parsePoint解析每一行数据
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    //初始化W到一个随机值数组
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 65
Source File: SparkKMeans.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.SparkContext._


object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 0 until centers.length) {
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkKMeans")
    val sc = new SparkContext(sparkConf)
    val lines = sc.textFile(args(0))
    val data = lines.map(parseVector _).cache()
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42).toArray
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println("Finished iteration (delta = " + tempDist + ")")
    }

    println("Final centers:")
    kPoints.foreach(println)
    sc.stop()
  }
}
// scalastyle:on println 
Example 66
Source File: SparkHdfsLR.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}
import org.apache.hadoop.conf.Configuration

import org.apache.spark._
import org.apache.spark.scheduler.InputFormatInfo



    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkHdfsLR").setMaster("local[2]")
    val inputPath = "D:\\spark\\spark-1.5.0-hadoop2.6\\data\\mllib\\lr_data.txt"//args(0)
    val conf = new Configuration()
    val sc = new SparkContext(sparkConf,
      InputFormatInfo.computePreferredLocations(
        Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
      ))
    val lines = sc.textFile(inputPath)
    val points = lines.map(parsePoint _).cache()//缓存
    val ITERATIONS = 6 //args(1).toInt 迭代次数

    // Initialize w to a random value
    //初始化W到一个随机值
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        //p代表DataPoint Vector
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    sc.stop()
  }
}
// scalastyle:on println 
Example 67
Source File: SparkTachyonHdfsLR.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}
import org.apache.hadoop.conf.Configuration

import org.apache.spark._
import org.apache.spark.scheduler.InputFormatInfo
import org.apache.spark.storage.StorageLevel



object SparkTachyonHdfsLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD(SGD随机梯度下降) or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS(BFGS是逆秩2拟牛顿法)
        |for more conventional use.
      """.stripMargin)
  }

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def main(args: Array[String]) {

    showWarning()

    val inputPath = args(0)
    val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR")
    val conf = new Configuration()
    val sc = new SparkContext(sparkConf,
      InputFormatInfo.computePreferredLocations(
        Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
      ))
    val lines = sc.textFile(inputPath)
    val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value 将w初始化为一个随机值
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    sc.stop()
  }
}
// scalastyle:on println 
Example 68
Source File: NearestNeighbors.scala    From SparkSMOTE   with MIT License 5 votes vote down vote up
package utils

import org.apache.spark.SparkContext
import breeze.linalg._
import breeze.linalg.{DenseVector,Vector,SparseVector}
import com.github.fommil.netlib.BLAS
import scala.util.Random
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast
import scala.collection.mutable.ArrayBuffer

object NearestNeighbors {

	def runNearestNeighbors(data: RDD[Array[(LabeledPoint,Int,Int)]], 
		kNN: Int, 
		sampleData: Array[(LabeledPoint,Int,Int)]): Array[(String,Array[((Int,Int),Double)])] = {
		
		val globalNearestNeighborsByIndex = data.mapPartitionsWithIndex(localNearestNeighbors(_,_,kNN,sampleData)).groupByKey().map(x => (x._1,x._2.toArray.sortBy(r => r._2).take(kNN))).collect()		

		globalNearestNeighborsByIndex 
	}


	private def localNearestNeighbors(partitionIndex: Long,
		iter: Iterator[Array[(LabeledPoint,Int,Int)]],
		kNN: Int,
		sampleData: Array[(LabeledPoint,Int,Int)]): Iterator[(String,((Int,Int),Double))] = { 
			
			var result = List[(String,((Int,Int),Double))]()
			val dataArr = iter.next
			val nLocal = dataArr.size - 1			
			val sampleDataSize = sampleData.size - 1


			val kLocalNeighbors = Array.fill[distanceIndex](sampleDataSize+1)(null)
			for {
			    i1 <- 0 to sampleDataSize
			} 
			kLocalNeighbors(i1) = distanceIndex(sampleData(i1)._3.toInt, sampleData(i1)._2.toInt, DenseVector.zeros[Double](kNN) + Int.MaxValue.toDouble, DenseVector.zeros[Int](kNN))

			for (i <- 0 to nLocal) {
				val currentPoint = dataArr(i)
				val features = currentPoint._1.features
				val rowId = currentPoint._3.toInt	
				for (j <- 0 to sampleDataSize) {
					val samplePartitionId = sampleData(j)._2
					val sampleRowId = sampleData(j)._3
					val sampleFeatures = sampleData(j)._1.features
					if (!((rowId == sampleRowId) & (samplePartitionId == partitionIndex))) {
						val distance = Math.sqrt(sum((sampleFeatures - features) :* (sampleFeatures - features)))
						if (distance < max(kLocalNeighbors(j).distanceVector)) {
							val indexToReplace = argmax(kLocalNeighbors(j).distanceVector)
							kLocalNeighbors(j).distanceVector(indexToReplace) = distance
							kLocalNeighbors(j).neighborRowId(indexToReplace) = rowId
						}
					}
				}
			}
			for (m <- 0 to sampleDataSize){
				for (l <-0 to kNN-1) {
					
					val key = kLocalNeighbors(m).partitionId.toString+","+kLocalNeighbors(m).sampleRowId.toString
					val tup = (partitionIndex.toInt,kLocalNeighbors(m).neighborRowId(l))
					result.::=(key,(tup,kLocalNeighbors(m).distanceVector(l)))
				}
			}			
		result.iterator 
	}	
} 
Example 69
Source File: loadData.scala    From SparkSMOTE   with MIT License 5 votes vote down vote up
package utils

import org.apache.spark.SparkContext
import breeze.linalg._
import breeze.linalg.{DenseVector,Vector,SparseVector}
import org.apache.spark.rdd.RDD
import org.apache.spark.broadcast.Broadcast

object loadData {

 	def readDelimitedData(sc: SparkContext, path: String, numFeatures: Int, delimiter: String, numPartitions: Int): RDD[(LabeledPoint,Int,Int)] = {
		val data = sc.textFile(path).filter{x => x.split(delimiter)(0).toDouble == 1.0}.repartition(numPartitions).mapPartitions{x => Iterator(x.toArray)}
		val formatData = data.mapPartitionsWithIndex{(partitionId,iter) =>
			var result = List[(LabeledPoint,Int,Int)]()
			val dataArray = iter.next
			val dataArraySize = dataArray.size - 1
			var rowCount = dataArraySize
			for (i <- 0 to dataArraySize) {
				val parts = dataArray(i).split(delimiter)
				result.::=((LabeledPoint(parts(0).toDouble,DenseVector(parts.slice(1,numFeatures+1)).map(_.toDouble)),partitionId.toInt,rowCount))
				rowCount = rowCount - 1
			}
			result.iterator
		}

		formatData
	}
	
} 
Example 70
Source File: SMOTE.scala    From SparkSMOTE   with MIT License 5 votes vote down vote up
package SMOTE

import org.apache.spark.SparkContext
import breeze.linalg._
import breeze.linalg.{DenseVector,Vector,SparseVector}
import com.github.fommil.netlib.BLAS
import scala.util.Random
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast
import scala.collection.mutable.ArrayBuffer
import utils._

object SMOTE {

	def runSMOTE(sc: SparkContext, 
		inPath: String, 
		outPath: String,
		numFeatures: Int,  
		oversamplingPctg: Double,
        kNN: Int,
		delimiter: String,
        numPartitions: Int): Unit = {

		val rand = new Random()

		val data = loadData.readDelimitedData(sc, inPath, numFeatures, delimiter, numPartitions)
		
		val dataArray = data.mapPartitions(x => Iterator(x.toArray)).cache()

        val numObs = dataArray.map(x => x.size).reduce(_+_)

		println("Number of Filtered Observations "+numObs.toString)		

		val roundPctg = oversamplingPctg
        val sampleData = dataArray.flatMap(x => x).sample(withReplacement = false, fraction = roundPctg, seed = 1L).collect().sortBy(r => (r._2,r._3)) //without Replacement

		println("Sample Data Count "+sampleData.size.toString)

	 	val globalNearestNeighbors = NearestNeighbors.runNearestNeighbors(dataArray, kNN, sampleData)
		
        var randomNearestNeighbor = globalNearestNeighbors.map(x => (x._1.split(",")(0).toInt,x._1.split(",")(1).toInt,x._2(rand.nextInt(kNN)))).sortBy(r => (r._1,r._2))
		
        var sampleDataNearestNeighbors = randomNearestNeighbor.zip(sampleData).map(x => (x._1._3._1._1, x._1._2, x._1._3._1._2, x._2._1))

		val syntheticData = dataArray.mapPartitionsWithIndex(createSyntheticData(_,_,sampleDataNearestNeighbors,delimiter)).persist()
		println("Synthetic Data Count "+syntheticData.count.toString)
		val newData = syntheticData.union(sc.textFile(inPath))
		println("New Line Count "+newData.count.toString)
		newData.saveAsTextFile(outPath)
	
	}

	private def createSyntheticData(partitionIndex: Long,
		iter: Iterator[Array[(LabeledPoint,Int,Int)]],
		sampleDataNN: Array[(Int,Int,Int,LabeledPoint)],
		delimiter: String): Iterator[String]  = {
			
			var result = List[String]()
			val dataArr = iter.next
			val nLocal = dataArr.size - 1			
			val sampleDataNNSize = sampleDataNN.size - 1
			val rand = new Random()			

			for (j <- 0 to sampleDataNNSize){
				val partitionId = sampleDataNN(j)._1
				val neighborId = sampleDataNN(j)._3
				val sampleFeatures = sampleDataNN(j)._4.features
				if (partitionId == partitionIndex.toInt){
					val currentPoint = dataArr(neighborId)	
					val features = currentPoint._1.features	
					sampleFeatures += (sampleFeatures - features) * rand.nextDouble
					result.::=("1.0"+delimiter+sampleFeatures.toArray.mkString(delimiter))	
				}
			}
		result.iterator
	}		
} 
Example 71
Source File: LocalKMeans.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.SparkContext._


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D){rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
} 
Example 72
Source File: SparkLR.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{Vector, DenseVector}

import org.apache.spark._


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Numer of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkLR")
    val sc = new SparkContext(sparkConf)
    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = sc.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)

    sc.stop()
  }
} 
Example 73
Source File: TestingUtils.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel

import breeze.linalg.{DenseMatrix, DenseVector, convert, zipValues}
import breeze.stats.distributions.Rand
import io.picnicml.doddlemodel.data.{Dataset, RealVector}
import org.scalactic.Equality

trait TestingUtils {

  implicit lazy val randomUniform: Rand[Float] = new Rand[Float] {
    override def draw(): Float = Rand.uniform.draw().toFloat
  }

  def breezeEqual(x0: DenseMatrix[Float], x1: DenseMatrix[Float])(implicit tol: Equality[Float]): Boolean =
    breezeEqual(x0.toDenseVector, x1.toDenseVector)

  def breezeEqual(x0: RealVector, x1: RealVector)(implicit tol: Equality[Float]): Boolean =
    zipValues(x0, x1).forall((v0, v1) => (v0.isNaN && v1.isNaN) || tol.areEquivalent(v0, v1))

  def gradApprox(func: RealVector => Float, x: RealVector, h: Double = 1e-3): RealVector = {
    // two-sided finite differences
    val grad = DenseVector.zeros[Double](x.length)
    for ((i, _) <- x.activeIterator) {
      val xPlusH = convert(x.copy, Double)
      xPlusH(i) += h
      val xMinusH = convert(x.copy, Double)
      xMinusH(i) -= h
      grad(i) = (func(convert(xPlusH, Float)) - func(convert(xMinusH, Float)).toDouble) / (2.0 * h)
    }
    convert(grad, Float)
  }

  def dummyData(nRows: Int, nCols: Int = 1): Dataset =
    (DenseMatrix.zeros[Float](nRows, nCols), convert(DenseVector((0 until nRows).toArray), Float))
} 
Example 74
Source File: GroupKFoldSplitterTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.modelselection

import breeze.linalg.DenseVector
import io.picnicml.doddlemodel.TestingUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class GroupKFoldSplitterTest extends AnyFlatSpec with Matchers with TestingUtils {

  "GroupKFoldSplitter" should "split data so that folds are i.i.d" in {
    val (x, y) = dummyData(10)
    val groups = DenseVector(1, 2, 2, 0, 0, 0, 2, 1, 1, 2)
    val splitter = GroupKFoldSplitter(numFolds = 3)
    val splits = splitter.splitData(x, y, groups)

    val noGroupsInTrainTestSplits = splits.forall { split =>
      val trGroups = split.yTr.map(x => groups(x.toInt)).toArray
      val teGroups = split.yTe.map(x => groups(x.toInt)).toArray
      trGroups.forall(trGroup => !teGroups.contains(trGroup))
    }

    noGroupsInTrainTestSplits shouldBe true
  }
} 
Example 75
Source File: LinearClassifierTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.numerics.sigmoid
import cats.syntax.option._
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier
import org.scalatest.OptionValues
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

case class DummyLinearClassifier(numClasses: Option[Int], w: Option[RealVector])

class LinearClassifierTest extends AnyFlatSpec with Matchers with OptionValues with TestingUtils {

  val ev: LinearClassifier[DummyLinearClassifier] = new LinearClassifier[DummyLinearClassifier] {

    override def numClasses(model: DummyLinearClassifier): Option[Int] = model.numClasses

    override protected def w(model: DummyLinearClassifier): Option[RealVector] = model.w

    override protected[doddlemodel] def copy(model: DummyLinearClassifier, numClasses: Int): DummyLinearClassifier =
      model.copy(numClasses = numClasses.some)

    override protected def copy(model: DummyLinearClassifier, w: RealVector): DummyLinearClassifier =
      model.copy(w = w.some)

    override protected def predictStateless(model: DummyLinearClassifier, w: RealVector, x: Features): Target =
      x * w

    override protected def predictProbaStateless(model: DummyLinearClassifier, w: RealVector, x: Features): Simplex =
      sigmoid(x * w).asDenseMatrix.t

    override protected[linear] def lossStateless(model: DummyLinearClassifier,
                                                 w: RealVector, x: Features, y: Target): Float = 0.0f

    override protected[linear] def lossGradStateless(model: DummyLinearClassifier,
                                                     w: RealVector, x: Features, y: Target): RealVector = w
  }

  private val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
  private val y = DenseVector.vertcat(DenseVector.zeros[Float](5), DenseVector.ones[Float](5))
  private val model = DummyLinearClassifier(none, none)

  "Linear classifier" should "throw an exception when using fit, predict on trained, untrained models" in {
    an [IllegalArgumentException] should be thrownBy ev.predict(model, x)
    val trainedModel = ev.fit(model, x, y)
    an [IllegalArgumentException] should be thrownBy ev.fit(trainedModel, x, y)
  }

  it should "implement predictor functions" in {
    ev.isFitted(model) shouldBe false
    val trainedModel = ev.fit(model, x, y)
    ev.isFitted(trainedModel) shouldBe true
    val yPred = ev.predict(trainedModel, x)
    yPred.length shouldEqual y.length
  }

  it should "set the number of classes after fit" in {
    ev.numClasses(model).isEmpty shouldBe true
    val trainedModel = ev.fit(model, x, y)
    ev.numClasses(trainedModel).value shouldBe 2
  }

  it should "throw an exception if fitting a model with an invalid target variable" in {
    val invalidCategoricalY = DenseVector.zeros[Float](10)
    an [IllegalArgumentException] should be thrownBy ev.fit(model, x, invalidCategoricalY)
    val invalidRealY = DenseVector.rand[Float](10, rand = randomUniform)
    an [IllegalArgumentException] should be thrownBy ev.fit(model, x, invalidRealY)
  }
} 
Example 76
Source File: SoftmaxClassifierTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.SoftmaxClassifier.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SoftmaxClassifierTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-3f)

  "Softmax classifier" should "calculate the value of the loss function" in {
    val w = DenseVector(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 1.0f)
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f),
      List(-2.0f, 1.0f, 0.0f)
    )
    val y = DenseVector(1.0f, 0.0f, 2.0f)

    val model = ev.copy(SoftmaxClassifier(lambda = 1.0f), numClasses = 3)
    ev.lossStateless(model, w, x, y) shouldEqual 19.843778223530194f
  }

  it should "calculate the gradient of the loss function wrt. to model parameters" in {
    for (_ <- 1 to 1000) {
      val w = DenseVector.rand[Float](5 * 9, rand = randomUniform)
      val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
      val y = DenseVector.rangeF(0, 10)
      testGrad(w, x, y)
    }

    def testGrad(w: RealVector, x: Features, y: Target) = {
      val model = ev.copy(SoftmaxClassifier(lambda = 0.5f), numClasses = 10)
      breezeEqual(
        gradApprox(w => ev.lossStateless(model, w, x, y), w),
        ev.lossGradStateless(model, w, x, y)
      ) shouldEqual true
    }
  }

  it should "prevent the usage of negative L2 regularization strength" in {
    an [IllegalArgumentException] shouldBe thrownBy(SoftmaxClassifier(lambda = -0.5f))
  }
} 
Example 77
Source File: PoissonRegressionTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector, convert}
import breeze.stats.distributions.Rand
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.PoissonRegression.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class PoissonRegressionTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-2f)

  "Poisson regression" should "calculate the value of the loss function" in {
    val w = DenseVector(1.0f, 2.0f, 3.0f)
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f)
    )
    val y = DenseVector(3.0f, 4.0f)

    val model = PoissonRegression(lambda = 1.0f)
    ev.lossStateless(model, w, x, y) shouldEqual 29926.429998513137f
  }

  it should "calculate the gradient of the loss function wrt. to model parameters" in {
    for (_ <- 1 to 1000) {
      val w = DenseVector.rand[Float](5, rand = randomUniform)
      val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
      val y = convert(DenseVector.rand(10, rand = Rand.randInt(20)), Float)
      testGrad(w, x, y)
    }

    def testGrad(w: RealVector, x: Features, y: Target) = {
      val model = PoissonRegression(lambda = 0.5f)
      breezeEqual(
        gradApprox(w => ev.lossStateless(model, w, x, y), w),
        ev.lossGradStateless(model, w, x, y)
      ) shouldEqual true
    }
  }

  it should "prevent the usage of negative L2 regularization strength" in {
    an [IllegalArgumentException] shouldBe thrownBy(PoissonRegression(lambda = -0.5f))
  }

  it should "throw an exception if fitting a model on a dataset that is not count data" in {
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f),
      List(3.0f, 1.0f, 2.0f)
    )
    val y = DenseVector.rand[Float](3, rand = randomUniform)
    val model = PoissonRegression()

    an [IllegalArgumentException] shouldBe thrownBy(ev.fit(model, x, y))
  }
} 
Example 78
Source File: LinearRegressionTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.LinearRegression.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class LinearRegressionTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-3f)

  "Linear regression" should "calculate the value of the loss function" in {
    val w = DenseVector(1.0f, 2.0f, 3.0f)
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f)
    )
    val y = DenseVector(3.0f, 4.0f)

    val model = LinearRegression(lambda = 1)
    ev.lossStateless(model, w, x, y) shouldEqual 24.75f
  }

  it should "calculate the gradient of the loss function wrt. to model parameters" in {
    for (_ <- 1 to 1000) {
      val w = DenseVector.rand[Float](5, rand = randomUniform)
      val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
      val y = DenseVector.rand[Float](10, rand = randomUniform)
      testGrad(w, x, y)
    }

    def testGrad(w: RealVector, x: Features, y: Target) = {
      val model = LinearRegression(lambda = 0.5f)
      breezeEqual(
        gradApprox(w => ev.lossStateless(model, w, x, y), w),
        ev.lossGradStateless(model, w, x, y)
      ) shouldEqual true
    }
  }

  it should "prevent the usage of negative L2 regularization strength" in {
    an [IllegalArgumentException] shouldBe thrownBy(LinearRegression(lambda = -0.5f))
  }
} 
Example 79
Source File: LogisticRegressionTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector, convert}
import breeze.numerics.round
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.LogisticRegression.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class LogisticRegressionTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-3f)

  "Logistic regression" should "calculate the value of the loss function" in {
    val w = DenseVector(1.0f, 2.0f, 3.0f)
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f)
    )
    val y = DenseVector(1.0f, 0.0f)

    val model = LogisticRegression(lambda = 1)
    ev.lossStateless(model, w, x, y) shouldEqual 7.1566391945397703f
  }

  it should "calculate the gradient of the loss function wrt. to model parameters" in {
    for (_ <- 1 to 1000) {
      val w = DenseVector.rand[Float](5, rand = randomUniform)
      val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
      val y = convert(round(DenseVector.rand[Float](10, rand = randomUniform)), Float)
      testGrad(w, x, y)
    }

    def testGrad(w: RealVector, x: Features, y: Target) = {
      val model = LogisticRegression(lambda = 0.5f)
      breezeEqual(
        gradApprox(w => ev.lossStateless(model, w, x, y), w),
        ev.lossGradStateless(model, w, x, y)
      ) shouldEqual true
    }
  }


  it should "prevent the usage of negative L2 regularization strength" in {
    an [IllegalArgumentException] shouldBe thrownBy(LogisticRegression(lambda = -0.5f))
  }

  it should "throw an exception if fitting a model on a dataset with more than two classes" in {
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f),
      List(3.0f, 1.0f, 2.0f)
    )
    val y = DenseVector(1.0f, 0.0f, 2.0f)
    val model = LogisticRegression()

    an [IllegalArgumentException] shouldBe thrownBy(ev.fit(model, x, y))
  }
} 
Example 80
Source File: LinearRegressorTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector}
import cats.syntax.option._
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

case class DummyLinearRegressor(w: Option[RealVector])

class LinearRegressorTest extends AnyFlatSpec with Matchers with TestingUtils {

  val ev: LinearRegressor[DummyLinearRegressor] = new LinearRegressor[DummyLinearRegressor] {

    override protected def w(model: DummyLinearRegressor): Option[RealVector] = model.w

    override protected def copy(model: DummyLinearRegressor): DummyLinearRegressor = model.copy()

    override protected def copy(model: DummyLinearRegressor, w: RealVector): DummyLinearRegressor =
      model.copy(w.some)

    override protected def targetVariableAppropriate(y: Target): Boolean = true

    override protected def predictStateless(model: DummyLinearRegressor, w: RealVector, x: Features): Target =
      x * w

    override protected[linear] def lossStateless(model: DummyLinearRegressor,
                                                 w: RealVector, x: Features, y: Target): Float = 0.0f

    override protected[linear] def lossGradStateless(model: DummyLinearRegressor,
                                                     w: RealVector, x: Features, y: Target): RealVector = w
  }

  private val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
  private val y = DenseVector.rand[Float](10, rand = randomUniform)
  private val model = DummyLinearRegressor(none)

  "Linear regressor" should "throw an exception when using fit, predict on trained, untrained models" in {
    an [IllegalArgumentException] should be thrownBy ev.predict(model, x)
    val trainedModel = ev.fit(model, x, y)
    an [IllegalArgumentException] should be thrownBy ev.fit(trainedModel, x, y)
  }

  it should "implement predictor functions" in {
    ev.isFitted(model) shouldBe false
    val trainedModel = ev.fit(model, x, y)
    ev.isFitted(trainedModel) shouldBe true
    val yPred = ev.predict(trainedModel, x)
    yPred.length shouldEqual y.length
  }
} 
Example 81
Source File: StandardScalerTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{*, DenseMatrix, DenseVector, convert}
import breeze.stats.{mean, stddev}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature}
import io.picnicml.doddlemodel.preprocessing.StandardScaler.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class StandardScalerTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-4f)

  "Standard scaler" should "preprocess the numerical features" in {
    val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
    val featureIndex = FeatureIndex(
      List(
        NumericalFeature,
        NumericalFeature,
        NumericalFeature,
        NumericalFeature,
        CategoricalFeature
      )
    )
    val scaler = StandardScaler(featureIndex)
    val trainedScaler = ev.fit(scaler, x)
    val xTransformed = ev.transform(trainedScaler, x)

    breezeEqual(mean(x(::, *)).t, DenseVector.zeros[Float](5)) shouldBe false
    breezeEqual(convert(stddev(x(::, *)).t, Float), DenseVector.ones[Float](5)) shouldBe false

    val expectedMeans = DenseVector.zeros[Float](5)
    expectedMeans(-1) = mean(x(::, -1))
    breezeEqual(mean(xTransformed(::, *)).t, expectedMeans) shouldBe true

    val expectedStdDevs = DenseVector.ones[Float](5)
    expectedStdDevs(-1) = stddev(x(::, -1)).toFloat
    breezeEqual(convert(stddev(xTransformed(::, *)).t, Float), expectedStdDevs) shouldBe true
  }

  it should "handle the zero variance case" in {
    val x = DenseMatrix.ones[Float](10, 5)
    val scaler = StandardScaler(FeatureIndex.numerical(5))
    val trainedScaler = ev.fit(scaler, x)
    val xTransformed = ev.transform(trainedScaler, x)

    xTransformed.forall(_.isNaN) shouldBe false
  }

  it should "preprocess a subset of numerical features" in {
    val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
    val scaler = StandardScaler(FeatureIndex.numerical(5).subset("f0", "f2", "f4"))
    val trainedScaler = ev.fit(scaler, x)
    val xTransformed = ev.transform(trainedScaler, x)

    breezeEqual(mean(x(::, *)).t, DenseVector.zeros[Float](5)) shouldBe false
    breezeEqual(convert(stddev(x(::, *)).t, Float), DenseVector.ones[Float](5)) shouldBe false

    assert(tolerance.areEqual(mean(xTransformed(::, 0)), 0.0f))
    assert(tolerance.areEqual(convert(stddev(xTransformed(::, 0)), Float), 1.0f))
    assert(!tolerance.areEqual(mean(xTransformed(::, 1)), 0.0f))
    assert(!tolerance.areEqual(convert(stddev(xTransformed(::, 1)), Float), 1.0f))
    assert(tolerance.areEqual(mean(xTransformed(::, 2)), 0.0f))
    assert(tolerance.areEqual(convert(stddev(xTransformed(::, 2)), Float), 1.0f))
    assert(!tolerance.areEqual(mean(xTransformed(::, 3)), 0.0f))
    assert(!tolerance.areEqual(convert(stddev(xTransformed(::, 3)), Float), 1.0f))
    assert(tolerance.areEqual(mean(xTransformed(::, 4)), 0.0f))
    assert(tolerance.areEqual(convert(stddev(xTransformed(::, 4)), Float), 1.0f))
  }
} 
Example 82
Source File: BinarizerTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature}
import io.picnicml.doddlemodel.preprocessing.Binarizer.ev
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class BinarizerTest extends AnyFlatSpec with Matchers with TestingUtils {

  private val x = DenseMatrix(
    List(0.0f, 1.0f, 0.0f),
    List(0.3f, -1.0f, 1.0f),
    List(-0.3f, 2.0f, 0.0f)
  )

  "Binarizer" should "process the numerical columns by corresponding thresholds" in {
    val featureIndex = FeatureIndex(List(NumericalFeature, NumericalFeature, CategoricalFeature))
    val thresholds = DenseVector(0.0f, -1.5f)

    val binarizer = Binarizer(thresholds, featureIndex)
    val xBinarizedExpected = DenseMatrix(
      List(0.0f, 1.0f, 0.0f),
      List(1.0f, 1.0f, 1.0f),
      List(0.0f, 1.0f, 0.0f)
    )

    breezeEqual(ev.transform(binarizer, x), xBinarizedExpected) shouldBe true
  }

  it should "process all the numerical columns by a single threshold" in {
    val featureIndex = FeatureIndex(List(NumericalFeature, NumericalFeature, NumericalFeature))
    val threshold = 0.5f

    val binarizer = Binarizer(threshold, featureIndex)
    val xBinarizedExpected = DenseMatrix(
      List(0.0f, 1.0f, 0.0f),
      List(0.0f, 0.0f, 1.0f),
      List(0.0f, 1.0f, 0.0f)
    )

    breezeEqual(ev.transform(binarizer, x), xBinarizedExpected) shouldBe true
  }

  it should "amount to no-op if there are no numerical features in data" in {
    val featureIndex = FeatureIndex(List(CategoricalFeature, CategoricalFeature, CategoricalFeature))
    val thresholds1 = DenseVector(0.0f, -1.5f)
    val thresholds2 = 0.5f

    val binarizer1 = Binarizer(thresholds1, featureIndex)
    val binarizer2 = Binarizer(thresholds2, featureIndex)

    breezeEqual(ev.transform(binarizer1, x), x) shouldBe true
    breezeEqual(ev.transform(binarizer2, x), x) shouldBe true
  }

  it should "fail when the amount of passed thresholds is different to number of numerical features in data" in {
    val featureIndex = FeatureIndex(List(NumericalFeature, NumericalFeature, NumericalFeature))
    val thresholds = DenseVector(0.0f, -1.5f)

    // 3 numeric columns vs 2 thresholds
    an [IllegalArgumentException] should be thrownBy Binarizer(thresholds, featureIndex)
  }
} 
Example 83
Source File: NormsTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class NormsTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-4f)

  private val x = DenseMatrix(
    List(0.0f, 0.0f, 0.0f),
    List(1.0f, 2.0f, 2.0f),
    List(-2.0f, 0.0f, 0.0f)
  )

  "Norms" should "calculate the L2 norm of each row" in {
    val xExpected = DenseVector(0.0f, 3.0f, 2.0f)
    breezeEqual(Norms.L2Norm(x), xExpected) shouldBe true
  }

  "Norms" should "calculate the L1 norm of each row" in {
    val xExpected = DenseVector(0.0f, 5.0f, 2.0f)
    breezeEqual(Norms.L1Norm(x), xExpected) shouldBe true
  }

  "Norms" should "calculate the max norm of each row" in {
    val xExpected = DenseVector(0.0f, 2.0f, 2.0f)
    breezeEqual(Norms.MaxNorm(x), xExpected) shouldBe true
  }

} 
Example 84
Source File: StratifiedClassifierTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.dummy.classification

import breeze.linalg.{DenseVector, convert}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{loadBreastCancerDataset, loadIrisDataset}
import io.picnicml.doddlemodel.dummy.classification.StratifiedClassifier.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class StratifiedClassifierTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-3f)

  "Stratified classifier" should "infer a categorical distribution from the iris dataset" in {
    val (x, y, _) = loadIrisDataset
    val model = StratifiedClassifier()
    val trainedModel = ev.fit(model, x, y)
    breezeEqual(
      convert(trainedModel.getTargetDistributionParams, Float),
      DenseVector(0.333f, 0.333f, 0.333f)
    ) shouldBe true
  }

  it should "infer a categorical distribution from the breast cancer dataset" in {
    val (x, y, _) = loadBreastCancerDataset
    val model = StratifiedClassifier()
    val trainedModel = ev.fit(model, x, y)
    breezeEqual(
      convert(trainedModel.getTargetDistributionParams, Float),
      DenseVector(0.372f, 0.627f)
    ) shouldBe true
  }
} 
Example 85
Source File: LocalFileLR.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{Vector, DenseVector}


object LocalFileLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
} 
Example 86
Source File: MostFrequentValueImputerTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.impute

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature}
import io.picnicml.doddlemodel.impute.MostFrequentValueImputer.ev
import org.scalatest.OptionValues
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class MostFrequentValueImputerTest extends AnyFlatSpec with Matchers with TestingUtils with OptionValues {

  "Most frequent value imputer" should "impute the categorical features" in {
    val xMissing = DenseMatrix(
      List(Float.NaN, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f),
      List(6.0f, 7.0f, 2.0f)
    )

    val xImputedExpected = DenseMatrix(
      List(6.0f, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f),
      List(6.0f, 7.0f, 2.0f)
    )

    val featureIndex = FeatureIndex.apply(List(CategoricalFeature, NumericalFeature, CategoricalFeature))
    val imputer = MostFrequentValueImputer(featureIndex)
    val fittedImputer = ev.fit(imputer, xMissing)

    breezeEqual(fittedImputer.mostFrequent.value, DenseVector(6.0f, 2.0f)) shouldBe true
    breezeEqual(ev.transform(fittedImputer, xMissing), xImputedExpected) shouldBe true
  }

  it should "impute a subset of categorical features" in {
    val xMissing = DenseMatrix(
      List(Float.NaN, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f),
      List(6.0f, 7.0f, 2.0f)
    )

    val xImputedExpected = DenseMatrix(
      List(Float.NaN, 1.0f, 2.0f),
      List(3.0f, 7.0f, 5.0f),
      List(6.0f, 7.0f, 8.0f),
      List(6.0f, 7.0f, 2.0f)
    )

    val featureIndex = FeatureIndex.categorical(List(1, 2))
    val imputer = MostFrequentValueImputer(featureIndex)
    val fittedImputer = ev.fit(imputer, xMissing)

    breezeEqual(fittedImputer.mostFrequent.value, DenseVector(7.0f, 2.0f)) shouldBe true
    breezeEqual(ev.transform(fittedImputer, xMissing), xImputedExpected) shouldBe true
  }
} 
Example 87
Source File: MeanValueImputerTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.impute

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature}
import io.picnicml.doddlemodel.impute.MeanValueImputer.ev
import org.scalatest.OptionValues
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class MeanValueImputerTest extends AnyFlatSpec with Matchers with TestingUtils with OptionValues {

  "Mean value imputer" should "impute the numerical features" in {
    val xMissing = DenseMatrix(
      List(Float.NaN, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f)
    )

    val xImputedExpected = DenseMatrix(
      List(4.5f, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f)
    )

    val imputer = MeanValueImputer(FeatureIndex.apply(List(NumericalFeature, CategoricalFeature, NumericalFeature)))
    val fittedImputer = ev.fit(imputer, xMissing)

    breezeEqual(fittedImputer.means.value, DenseVector(4.5f, 5.0f)) shouldBe true
    breezeEqual(ev.transform(fittedImputer, xMissing), xImputedExpected) shouldBe true
  }

  it should "impute a subset of numerical features" in {
    val xMissing = DenseMatrix(
      List(Float.NaN, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f)
    )

    val xImputedExpected = DenseMatrix(
      List(4.5f, 1.0f, 2.0f),
      List(3.0f, Float.NaN, 5.0f),
      List(6.0f, 7.0f, 8.0f)
    )

    val imputer = MeanValueImputer(FeatureIndex.numerical(List(0, 2)))
    val fittedImputer = ev.fit(imputer, xMissing)

    breezeEqual(fittedImputer.means.value, DenseVector(4.5f, 5.0f)) shouldBe true
    breezeEqual(ev.transform(fittedImputer, xMissing), xImputedExpected) shouldBe true
  }
} 
Example 88
Source File: DatasetUtilsTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.data

import breeze.linalg.DenseVector
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset, splitDatasetWithGroups}
import org.scalactic.{Equality, TolerantNumerics}

import scala.util.Random
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class DatasetUtilsTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val rand: Random = new Random(0)
  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1.0f)

  val (x, y, _) = loadIrisDataset

  "Dataset utils" should "shuffle the dataset" in {
    val (_, yShuffled) = shuffleDataset(x, y)
    breezeEqual(y, yShuffled) shouldBe false
  }

  they should "split the dataset" in {
    val split = splitDataset(x, y)
    split.yTr.length shouldBe 75
    split.yTe.length shouldBe 75
  }

  they should "split the dataset with groups" in {
    val groups = DenseVector((0 until x.rows).map(x => x % 4):_*)
    val split = splitDatasetWithGroups(x, y, groups, proportionTrain = 0.8f)
    val groupsTe = split.groupsTe.toArray
    split.groupsTr.forall(trGroup => !groupsTe.contains(trGroup)) shouldBe true
  }
} 
Example 89
Source File: CsvLoaderTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.data

import breeze.linalg.{DenseMatrix, DenseVector}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, NumericalFeature}
import io.picnicml.doddlemodel.data.ResourceDatasetLoaders.loadDummyCsvReadingDataset
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class CsvLoaderTest extends AnyFlatSpec with Matchers with TestingUtils {

  "Csv loader" should "load and encode data" in {
    val (x, y, featureIndex) = loadDummyCsvReadingDataset
    val xCorrect = DenseMatrix(
      List(0.0f, 0.0f, 0.1f, 1.1f),
      List(1.0f, Float.NaN, 0.2f, 1.2f),
      List(2.0f, 1.0f, 0.3f, Float.NaN),
      List(3.0f, 2.0f, 0.4f, 1.4f),
      List(0.0f, 0.0f, 0.1f, 1.1f),
      List(3.0f, Float.NaN, 0.4f, 1.4f)
    )
    val yCorrect = DenseVector(0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 3.0f)
    breezeEqual(x, xCorrect) shouldBe true
    breezeEqual(y, yCorrect) shouldBe true
    featureIndex.names shouldBe IndexedSeq("f0", "f1", "f2", "f3")
    featureIndex.types shouldBe IndexedSeq(
      CategoricalFeature,
      CategoricalFeature,
      NumericalFeature,
      NumericalFeature
    )
    featureIndex.columnIndices shouldBe (0 until 4)
  }
} 
Example 90
Source File: ClassificationMetricsTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.metrics

import breeze.linalg.DenseVector
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ClassificationMetricsTest extends AnyFlatSpec with Matchers {

  "Classification metrics" should "calculate the classification accuracy value" in {
    val y = DenseVector(1.0f, 0.0f, 0.0f, 0.0f, 1.0f)
    val yPred = DenseVector(1.0f, 1.0f, 0.0f, 1.0f, 0.0f)

    accuracy(y, yPred) shouldEqual 0.4f
  }

  they should "calculate the precision value" in {
    val y = DenseVector(1.0f, 0.0f, 0.0f, 0.0f, 1.0f)
    val yPred = DenseVector(1.0f, 1.0f, 0.0f, 1.0f, 0.0f)

    precision(y, yPred) shouldBe 0.3333333333333333f
  }

  they should "calculate the recall value" in {
    val y = DenseVector(1.0f, 0.0f, 0.0f, 0.0f, 1.0f)
    val yPred = DenseVector(1.0f, 1.0f, 0.0f, 1.0f, 0.0f)

    recall(y, yPred) shouldBe 0.5f
  }

  they should "calculate the F1 score value" in {
    val y = DenseVector(1.0f, 0.0f, 0.0f, 0.0f, 1.0f)
    val yPred = DenseVector(1.0f, 1.0f, 0.0f, 1.0f, 0.0f)

    f1Score(y, yPred) shouldBe 0.4f
  }

  they should "calculate the Hamming loss value" in {
    val y = DenseVector(1.0f, 0.0f, 0.0f, 0.0f, 1.0f)
    val yPred = DenseVector(1.0f, 1.0f, 0.0f, 1.0f, 0.0f)

    // 3 out of 5 miss-classifications
    hammingLoss(y, yPred) shouldBe 0.6f
  }
} 
Example 91
Source File: RankingMetricsTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.metrics

import breeze.linalg.DenseVector
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class RankingMetricsTest extends AnyFlatSpec with Matchers {

  "Ranking metrics" should "calculate the AUC value" in {
    val y = DenseVector(1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f)
    val yPredProba = DenseVector(
      0.6346f, 0.0742f, 0.4324f, 0.9911f, 0.7245f, 0.4751f, 0.5112f, 0.0311f, 0.7641f, 0.6612f, 0.0134f
    )

    val aucScore = auc(y, yPredProba)
    aucScore shouldBe 0.733333333333333f +- 1e-15f
  }
} 
Example 92
Source File: RegressionMetricsTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.metrics

import breeze.linalg.DenseVector
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class RegressionMetricsTest extends AnyFlatSpec with Matchers {

  "Regression metrics" should "calculate the rmse value" in {
    val y = DenseVector(1.0f, 4.1f, 2.2f, 5.1f, 9.6f)
    val yPred = DenseVector(1.2f, 1.4f, 8.2f, 3.1f, 9.6f)

    rmse(y, yPred) shouldEqual 3.076686529368892f
  }

  they should "calculate the mse value" in {
    val y = DenseVector(1.0f, 4.1f, 2.2f, 5.1f, 9.6f)
    val yPred = DenseVector(1.2f, 1.4f, 8.2f, 3.1f, 9.6f)

    mse(y, yPred) shouldEqual 9.466f +- 0.001f
  }

  they should "calculate the mae value" in {
    val y = DenseVector(1.0f, 4.1f, 2.2f, 5.1f, 9.6f)
    val yPred = DenseVector(1.2f, 1.9f, 2.8f, 4.1f, 10.6f)

    assert(mae(y, yPred) === 1.0f +- 0.0000001f)
  }

  they should "calculate the explained variance score" in {
    val y = DenseVector(1.0f, 4.1f, 2.2f, 5.1f, 9.6f)
    val yPred = DenseVector(2.2f, 2.9f, 0.0f, 6.1f, 10.8f)

    assert(explainedVariance(y, yPred) === 0.769195820081781f +- 0.0000001f)
  }
} 
Example 93
Source File: LinearModel.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear.typeclasses

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.optimize.{DiffFunction, LBFGS}
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.typeclasses.Predictor

trait LinearModel[A] {
  this: Predictor[A] =>

  
  protected[linear] def lossGradStateless(model: A, w: RealVector, x: Features, y: Target): RealVector

  override def isFitted(model: A): Boolean = w(model).isDefined

  override def predictSafe(model: A, x: Features): Target =
    predictStateless(model, w(model).get, xWithBiasTerm(x))

  protected def maximumLikelihood(model: A, x: Features, y: Target, init: RealVector): RealVector = {
    val diffFunction = new DiffFunction[RealVector] {
      override def calculate(w: RealVector): (Double, RealVector) =
        (lossStateless(model, w, x, y).toDouble, lossGradStateless(model, w, x, y))
    }
    val lbfgs = new LBFGS[DenseVector[Float]](tolerance = 1e-4)
    lbfgs.minimize(diffFunction, init)
  }

  protected def xWithBiasTerm(x: Features): Features =
    DenseMatrix.horzcat(DenseMatrix.ones[Float](x.rows, 1), x)
} 
Example 94
Source File: LinearClassifier.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear.typeclasses

import breeze.linalg.DenseVector
import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
import io.picnicml.doddlemodel.syntax.OptionSyntax._
import io.picnicml.doddlemodel.typeclasses.Classifier

trait LinearClassifier[A] extends LinearModel[A] with Classifier[A] {

  
  protected def predictProbaStateless(model: A, w: RealVector, x: Features): Simplex

  override protected def fitSafe(model: A, x: Features, y: Target): A = {
    val wLength = (x.cols + 1) * (numClasses(model).getOrBreak - 1)
    val wInitial = DenseVector.zeros[Float](wLength)
    copy(model, w = maximumLikelihood(model, xWithBiasTerm(x), y, wInitial))
  }

  override protected def predictProbaSafe(model: A, x: Features): Simplex =
    predictProbaStateless(model, w(model).get, xWithBiasTerm(x))
} 
Example 95
Source File: Binarizer.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.DenseVector
import io.picnicml.doddlemodel.data.Feature.FeatureIndex
import io.picnicml.doddlemodel.data.{Features, RealVector}
import io.picnicml.doddlemodel.typeclasses.Transformer

case class Binarizer(private val thresholds: RealVector, private val featureIndex: FeatureIndex) {
  private val numNumeric = featureIndex.numerical.columnIndices.length
  require(numNumeric == 0 || numNumeric == thresholds.length, "A threshold should be given for every numerical column")
}


  def apply(threshold: Float, featureIndex: FeatureIndex): Binarizer = {
    val numNumeric: Int = featureIndex.numerical.columnIndices.length
    val thresholdsExtended = DenseVector.fill(numNumeric) { threshold }
    Binarizer(thresholdsExtended, featureIndex)
  }

  @SerialVersionUID(0L)
  implicit lazy val ev: Transformer[Binarizer] = new Transformer[Binarizer] {

    override def isFitted(model: Binarizer): Boolean = true

    override def fit(model: Binarizer, x: Features): Binarizer = model

    override protected def transformSafe(model: Binarizer, x: Features): Features = {
      val xCopy = x.copy
      model.featureIndex.numerical.columnIndices.zipWithIndex.foreach {
        case (colIndex, thresholdIndex) => (0 until xCopy.rows).foreach {
          rowIndex =>
            xCopy(rowIndex, colIndex) = if (xCopy(rowIndex, colIndex) > model.thresholds(thresholdIndex)) 1.0f else 0.0f
        }
      }

      xCopy
    }
  }
} 
Example 96
Source File: GibbsSample.scala    From glintlda   with MIT License 5 votes vote down vote up
package glintlda

import breeze.linalg.{DenseVector, SparseVector, sum}
import glintlda.util.FastRNG


  def apply(sv: SparseVector[Int], random: FastRNG, topics: Int): GibbsSample = {
    val totalTokens = sum(sv)
    val sample = new GibbsSample(new Array[Int](totalTokens), new Array[Int](totalTokens))

    var i = 0
    var current = 0
    while (i < sv.activeSize) {
      val index = sv.indexAt(i)
      var value = sv.valueAt(i)
      while (value > 0) {
        sample.features(current) = index
        sample.topics(current) = random.nextPositiveInt() % topics
        current += 1
        value -= 1
      }
      i += 1
    }

    sample
  }

} 
Example 97
Source File: Sampler.scala    From glintlda   with MIT License 5 votes vote down vote up
package glintlda.naive

import breeze.linalg.{DenseVector, Vector}
import breeze.stats.distributions.Multinomial
import glintlda.LDAConfig
import glintlda.util.FastRNG


  def sampleFeature(feature: Int, oldTopic: Int): Int = {
    var i = 0
    val p = DenseVector.zeros[Double](config.topics)
    var sum = 0.0
    while (i < config.topics) {
      p(i) = (documentCounts(i) + α) * ((wordCounts(i) + β) / (globalCounts(i) + βSum))
      sum += p(i)
      i += 1
    }
    p /= sum
    Multinomial(p).draw()
  }

} 
Example 98
Source File: SparkHdfsLR.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkHdfsLR {
  val D = 10   // Number of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 2) {
      System.err.println("Usage: SparkHdfsLR <file> <iters>")
      System.exit(1)
    }

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkHdfsLR")
      .getOrCreate()

    val inputPath = args(0)
    val lines = spark.read.textFile(inputPath).rdd

    val points = lines.map(parsePoint).cache()
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    spark.stop()
  }
}
// scalastyle:on println 
Example 99
Source File: LocalLR.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{DenseVector, Vector}


object LocalLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- data) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient +=  p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 100
Source File: SparkKMeans.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import breeze.linalg.{squaredDistance, DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 0 until centers.length) {
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkKMeans")
      .getOrCreate()

    val lines = spark.read.textFile(args(0)).rdd
    val data = lines.map(parseVector _).cache()
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42)
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println("Finished iteration (delta = " + tempDist + ")")
    }

    println("Final centers:")
    kPoints.foreach(println)
    spark.stop()
  }
}
// scalastyle:on println 
Example 101
Source File: LocalFileLR.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{DenseVector, Vector}


object LocalFileLR {
  val D = 10   // Number of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 102
Source File: SparkLR.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkLR")
      .getOrCreate()

    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = spark.sparkContext.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)

    spark.stop()
  }
}
// scalastyle:on println 
Example 103
Source File: LocalKMeans.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 104
Source File: SphericalHarmonicsSolver.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.deluminate

import breeze.linalg.{DenseMatrix, DenseVector}
import scalismo.color.RGB
import scalismo.faces.numerics.SphericalHarmonics
import scalismo.geometry.{EuclideanVector, _3D}


  def solveSHSystemDeconvolve(points: IndexedSeq[IlluminatedPoint],
    kernel: IndexedSeq[Double]): IndexedSeq[EuclideanVector[_3D]] = {
    require(points.nonEmpty)
    // direct access data
    val radiances = points.map(_.radiance)
    val normals = points.map(_.normal)
    val albedi = points.map(_.albedo)

    // prepare SH basis
    val nSH = kernel.length
    val shBasis = IndexedSeq.tabulate(nSH)(i => SphericalHarmonics.shBasisFunction(i))

    // build target vector on rhs: b (3*#points x 1), vectorize all colors to r, g, b
    val b = DenseVector(radiances.toArray.flatMap(r => r.toVector.toArray))

    // build matrix: (3*#points) x  (3*#lightCoefficients)
    def matrixBuilder(i: Int, j: Int): Double = {
      // major indices: point, light coefficient
      val pointIndex = i / 3
      val shCoeffIndex = j / 3
      // minor indices: color index R, G, B
      val pointColorIndex = i % 3
      val shColorIndex = j % 3
      // matrix element: albedo[point, color] * Y[shCoeff](normal[point]) * kernel(shCoeff) * delta(pointColor, shColor)
      if (pointColorIndex == shColorIndex)
        albedi(pointIndex).toVector.toArray(pointColorIndex) * shBasis(shCoeffIndex)(normals(pointIndex)) * kernel(shCoeffIndex)
      else
        0.0
    }
    val A: DenseMatrix[Double] = DenseMatrix.tabulate(3 * points.length, 3 * nSH)(matrixBuilder)
    // solve linear system
    val lightField: DenseVector[Double] = A \ b

    // extract channeled coefficients
    val shCoeffs: IndexedSeq[EuclideanVector[_3D]] = lightField.toArray.grouped(3).map(a => EuclideanVector[_3D](a)).toIndexedSeq

    // finished
    shCoeffs
  }
} 
Example 105
Source File: ColorTransform.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.render

import breeze.linalg.{DenseMatrix, DenseVector, inv}
import scalismo.color.RGB
import scalismo.color.ColorSpaceOperations.implicits._
import scalismo.geometry.{SquareMatrix, _3D}


  def invert: ColorTransform = new ColorTransform {
    private val c = colorContrast
    private val A = DenseMatrix(
      (c * (1 - 0.3) + 0.3, 0.59 - 0.59 * c, 0.11 - 0.11 * c),
      (0.3 - 0.3 * c, c * (1 - 0.59) + 0.59, 0.11 - 0.11 * c),
      (0.3 - 0.3 * c, 0.59 - 0.59 * c, c * (1 - 0.11) + 0.11))

    private val Ainv = inv(A)

    override def apply(color: RGB): RGB = {
      val mixed = (color - offset) / gain
      val b: DenseVector[Double] = Ainv * DenseVector[Double](mixed.r, mixed.g, mixed.b)
      RGB(b(0), b(1), b(2))
    }

    def invert: ColorTransformWithColorContrast = self
  }
} 
Example 106
Source File: MoMoRenderer.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.sampling.face

import breeze.linalg.DenseVector
import scalismo.color.RGBA
import scalismo.faces.image.PixelImage
import scalismo.faces.landmarks.TLMSLandmark2D
import scalismo.mesh.VertexColorMesh3D
import scalismo.faces.momo.{MoMo, MoMoCoefficients}
import scalismo.faces.parameters.{MoMoInstance, ParametricRenderer, RenderParameter}
import scalismo.geometry.Point
import scalismo.mesh.MeshSurfaceProperty
import scalismo.utils.Memoize


  def cached(cacheSize: Int) = new MoMoRenderer(model, clearColor) {
    private val imageRenderer = Memoize(super.renderImage, cacheSize)
    private val meshRenderer = Memoize(super.renderMesh, cacheSize)
    private val maskRenderer = Memoize((super.renderMask _).tupled, cacheSize)
    private val lmRenderer = Memoize((super.renderLandmark _).tupled, cacheSize * allLandmarkIds.length)
    private val instancer = Memoize(super.instanceFromCoefficients _, cacheSize)

    override def renderImage(parameters: RenderParameter): PixelImage[RGBA] = imageRenderer(parameters)
    override def renderLandmark(lmId: String, parameter: RenderParameter): Option[TLMSLandmark2D] = lmRenderer((lmId, parameter))
    override def renderMesh(parameters: RenderParameter): VertexColorMesh3D = meshRenderer(parameters)
    override def instance(parameters: RenderParameter): VertexColorMesh3D = instancer(parameters.momo)
    override def renderMask(parameters: RenderParameter, mask: MeshSurfaceProperty[Int]): PixelImage[Int] = maskRenderer((parameters, mask))
  }
}

object MoMoRenderer {
  def apply(model: MoMo, clearColor: RGBA) = new MoMoRenderer(model, clearColor)
  def apply(model: MoMo) = new MoMoRenderer(model, RGBA.BlackTransparent)
} 
Example 107
Source File: DenseCholesky.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.numerics

import breeze.linalg.{DenseMatrix, DenseVector}


  def substitutionSolver(choleskyFactor: DenseMatrix[Double], b: DenseVector[Double]): DenseVector[Double] = {
    require(choleskyFactor.rows == b.length, "dimensions disagree")
    require(choleskyFactor.rows == choleskyFactor.cols, "L must be square")

    val L = choleskyFactor
    val n = L.rows

    // solve L Lt x = b
    // 1) solve Ly = b
    val y = DenseVector.zeros[Double](n)
    // for each row substitute
    var row = 0
    while (row < n) {
      // previous elements
      val sum = L(row, 0 until row) * y(0 until row)
      // divide by diagonal element
      y(row) = (b(row) - sum) / L(row, row)
      row += 1
    }

    // 2) solve Lt x = y
    val x = DenseVector.zeros[Double](n)
    row = n - 1 // bwd substitution from right to left
    while (row >= 0) {
      // previous elements
      val sum = L.t(row, row + 1 until n) * x(row + 1 until n)
      // divie by diagonal element
      x(row) = (y(row) - sum) / L(row, row)
      row -= 1
    }
    // x contains result
    x
  }
} 
Example 108
Source File: SphericalHarmonicsLightTests.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.parameters

import breeze.linalg.DenseVector
import scalismo.faces.FacesTestSuite
import scalismo.color.RGB
import scalismo.geometry.{EuclideanVector, EuclideanVector3D, _3D}

class SphericalHarmonicsLightTests extends FacesTestSuite {

  describe("SphericalHarmonicsLight parameters") {
    it("can be constructed with specific number of bands") {
      val sh5 = SphericalHarmonicsLight.zero(5)
      sh5.bands shouldBe 5
      sh5.coefficients.length shouldBe SphericalHarmonicsLight.coefficientsInBands(5)
    }
    
    it("can be rescaled to more components") {
      val sh = SphericalHarmonicsLight.frontal
      assert(sh.bands == 1)

      val band2 = sh.withNumberOfBands(2)
      band2.coefficients.length shouldBe 9
      band2.bands shouldBe 2

      band2.coefficients.take(4) shouldBe sh.coefficients
      band2.coefficients.drop(4) shouldBe IndexedSeq.fill(5)(EuclideanVector3D.zero)
    }

    it("can be rescaled to fewer components") {
      val sh = SphericalHarmonicsLight.frontal
      assert(sh.bands == 1)

      val band0 = sh.withNumberOfBands(0)
      band0.coefficients.length shouldBe 1
      band0.bands shouldBe 0

      band0.coefficients(0) shouldBe sh.coefficients(0)
    }

    it("SH to DenseVector and from DenseVector"){
      val sh = SphericalHarmonicsLight.frontal
      val shB = sh.toBreezeVector
      val shN = SphericalHarmonicsLight.fromBreezeVector(shB)
      sh shouldBe shN

      val bV =  DenseVector(Array.fill(27)(rnd.scalaRandom.nextGaussian()))
      val shL = SphericalHarmonicsLight.fromBreezeVector(bV)
      val nV = shL.toBreezeVector
      bV.toArray should contain theSameElementsInOrderAs nV.toArray
    }

    it("avoids building of invalid coefficients") {
      val wrongLength = IndexedSeq.fill(2)(EuclideanVector3D.zero)
      an [IllegalArgumentException] should be thrownBy  SphericalHarmonicsLight(wrongLength)
    }

    describe("To recover a principal direction of illumination, SphericalHarmonicsLight") {
      it("extracts consistently the direction from randomly generated directed spherical harmonics.") {

        
        def testDirectionFromSH(eps: Double, repeat: Int): Boolean = {
          def genDirLight(v: EuclideanVector[_3D]) = SphericalHarmonicsLight.fromAmbientDiffuse(RGB(rnd.scalaRandom.nextDouble(), rnd.scalaRandom.nextDouble(), rnd.scalaRandom.nextDouble()), RGB(rnd.scalaRandom.nextDouble(), rnd.scalaRandom.nextDouble(), rnd.scalaRandom.nextDouble()), v)
          def randDirection = EuclideanVector.fromSpherical(1.0, rnd.scalaRandom.nextDouble() * math.Pi, rnd.scalaRandom.nextDouble() * math.Pi * 2.0)//non-uniform on the sphere! Does not matter for the test.
          (0 until repeat).forall { _ =>
            val v = randDirection.normalize
            val rec = SphericalHarmonicsLight.directionFromSHLightIntensity(genDirLight(v))
            if(rec.isDefined) { // if we find a direction
            val d = (v - rec.get).norm
              d < eps
            }else // if no direction is found. Should always find a direction in this test.
              false
          }
        }

        testDirectionFromSH(1e-14, 50) shouldBe true
      }

      it("gives sensible results for undirected SHL.") {
        val rec = SphericalHarmonicsLight.directionFromSHLightIntensity(SphericalHarmonicsLight.ambientWhite)
        rec.isDefined shouldBe false
      }
    }
  }
} 
Example 109
Source File: ConjugateGradientTests.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.numerics

import breeze.linalg.{CSCMatrix, DenseVector, norm}
import scalismo.faces.FacesTestSuite

class ConjugateGradientTests extends FacesTestSuite {

  val n = 50
  val tol = 1e-10

  val A = randomDiagDomBandMatrix(n, 10)
  val Ad = A.toDense
  val b = DenseVector.rand[Double](n)

  
  def randomDiagDomMatrix(n: Int, elementsPerColumn: Int): CSCMatrix[Double] = {
    val builder = new CSCMatrix.Builder[Double](n, n)
    // generate band
    for (col <- 0 until n; j <- 0 until elementsPerColumn) {
      val e = rnd.scalaRandom.nextDouble()
      val row = rnd.scalaRandom.nextInt(n)
      builder.add(row, col, e)
      builder.add(col, row, e)
    }
    // diagonally dominant
    for (i <- 0 until n) {
      builder.add(i, i, 2 * elementsPerColumn)
    }
    builder.result()
  }

  describe("ConjugateGradient solver") {
    it("can solve a random sparse linear system to high accuracy") {
      val x = ConjugateGradient.solveSparse(A, b, tol / 10)
      norm(b - A * x) should be < tol
    }
  }

  describe("PreconditionedConjugateGradient solver") {
    it("can solve a random sparse linear system to high accuracy with the incomplete Cholesky preconditioner") {
      val M = PreconditionedConjugateGradient.incompleteCholeskyPreconditioner(A)
      val x = PreconditionedConjugateGradient.solveSparse(A, b, M, tol / 10)
      norm(b - A * x) should be < tol
    }
  }
} 
Example 110
Source File: SparkKMeans.scala    From AI   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package com.bigchange.basic

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.{SparkConf, SparkContext}



object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {

    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- centers.indices) {
      // 最近距离计算
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("SparkKMeans").setMaster("local")
    val sc = new SparkContext(sparkConf)
    val lines = sc.textFile(args(0))
    val data = lines.map(parseVector).cache()
    // inital K 值
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42)
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println("Finished iteration (delta = " + tempDist + ")")
    }

    println("Final centers:")
    kPoints.foreach(println)
    sc.stop()
  }
}
// scalastyle:on println 
Example 111
Source File: StreamingModelProducer.scala    From AI   with Apache License 2.0 5 votes vote down vote up
package com.bigchange.streaming

import java.io.PrintWriter
import java.net.ServerSocket

import breeze.linalg.DenseVector

import scala.util.Random


object StreamingModelProducer {

  def main(args: Array[String]) {

    val maxEvent = 100
    val numFeatures = 100
    val random = new Random()
    // 生成服从正太分布的稠密向量函数
    def generateRandomArray(n: Int) = Array.tabulate(n)(_ => random.nextGaussian())
    // 一个确定的随机模型权重向量
    val w = new DenseVector(generateRandomArray(numFeatures))
    val intercept = random.nextGaussian() * 10
    // 生成一些随机数据事件
    def generateNoisyData(n:Int) = {

      (1 to n).map { i  =>
        val x = new DenseVector(generateRandomArray(numFeatures)) // 随机特征向量
        val y = w.dot(x)
        val noisy = y + intercept // 目标值
        (noisy, x)
      }
    }

    // 创建网络生成器
    val listener = new ServerSocket(9999)
    println("listener port:" + listener.getLocalPort)

    while(true) {
      val socket = listener.accept()
      new Thread() {
        override def run() = {
          println("get client from:" + socket.getInetAddress)
          val out = new PrintWriter(socket.getOutputStream, true)

          while (true) {
            Thread.sleep(1000)
            val num = random.nextInt(maxEvent)
            val productEvents = generateNoisyData(num)
            productEvents.foreach { case(y, x) =>
              out.write(y + "\t" + x.data.mkString(","))
              out.write("\n")
            }
            out.flush()
            println(s"created $num events")
          }
          socket.close()
        }

      }.start()
    }

  }
} 
Example 112
Source File: StreamingSimpleModel.scala    From AI   with Apache License 2.0 5 votes vote down vote up
package com.bigchange.streaming

import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
import org.apache.spark.streaming.{Seconds, StreamingContext}


object StreamingSimpleModel {

  def main(args: Array[String]) {

    val ssc = new StreamingContext("local","test",Seconds(10))
    val stream = ssc.socketTextStream("localhost",9999)
    val numberFeatures = 100
    val zeroVector = DenseVector.zeros[Double](numberFeatures)
    val model = new StreamingLinearRegressionWithSGD()
      .setInitialWeights(Vectors.dense(zeroVector.data))
      .setNumIterations(1)
      .setStepSize(0.01)


    val labeledStream = stream.map { event =>
      val split = event.split("\t")
      val y = split(0).toDouble
      val features = split(1).split(",").map(_.toDouble)
      LabeledPoint(label = y, features = Vectors.dense(features))
    }

    model.trainOn(labeledStream)
    // 使用DStream的转换算子
    val predictAndTrue = labeledStream.transform { rdd =>
     val latestModel = model.latestModel()
      rdd.map { point =>
        val predict = latestModel.predict(point.features)
        predict - point.label
      }
    }
    // 计算MSE
    predictAndTrue.foreachRDD { rdd =>
      val  mse = rdd.map(x => x * x).mean()
      val rmse = math.sqrt(mse)
      println(s"current batch, MSE: $mse, RMSE:$rmse")

    }
    ssc.start()
    ssc.awaitTermination()

  }
} 
Example 113
Source File: MathUtil.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.util

import breeze.linalg.{DenseVector, Transpose}
import breeze.numerics.sqrt
import org.apache.commons.math.util.FastMath



object MathUtil {

  val LOGZERO = Double.NegativeInfinity

  def isLogZero(x: Double): Boolean = x.isNegInfinity

  def exp(x: Double): Double = {
    if (x.isNegInfinity)
      0.0
    else
      FastMath.exp(x)
  }

  def ln(x: Double): Double = {
    if(x == 0.0)
      LOGZERO
    else
      FastMath.log(x)
  }

  def lnsum(a: Double, b: Double): Double = {
    if(a.isNegInfinity || b.isNegInfinity) {
      if(a.isNegInfinity)
        b
      else
        a
    } else {
      if(a > b)
        a + ln(1 + FastMath.exp(b-a))
      else
        b + ln(1 + FastMath.exp(a-b))
    }
  }

  def lnsum(seq: TraversableOnce[Double]): Double = {
    seq.foldLeft(MathUtil.ln(0.0))(MathUtil.lnsum)
  }

  def lnproduct(seq: TraversableOnce[Double]): Double = {
    seq.foldLeft(MathUtil.ln(1.0))(MathUtil.lnproduct)
  }

  def lnproduct(a: Double, b: Double): Double = {
    if (a.isNegInfinity || b.isNegInfinity)
      LOGZERO
    else
      a + b
  }

  def magnitude(vector: Transpose[DenseVector[Double]]): Double = {
    sqrt(vector * vector.t)
  }
  def magnitude(vector: Transpose[DenseVector[Float]]): Float = {
    sqrt(vector * vector.t)
  }

  def cosineSimilarity(vector1: Transpose[DenseVector[Double]], vector2: Transpose[DenseVector[Double]]): Double = {
    (vector1 * vector2.t) / (magnitude(vector1) * magnitude(vector2))
  }
  def cosineSimilarity(vector1: Transpose[DenseVector[Float]], vector2: Transpose[DenseVector[Float]]): Float = {
    (vector1 * vector2.t) / (magnitude(vector1) * magnitude(vector2))
  }
} 
Example 114
Source File: MathUtilTest.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.util

import breeze.linalg.DenseVector
import org.dbpedia.spotlight.util.MathUtil.{cosineSimilarity, magnitude}
import org.junit.Test


class MathUtilTest {
  @Test
  def testMagnitudeDouble {
    val doubleExamples = Map(
      DenseVector.zeros[Double](5).t -> 0.0,
      DenseVector.ones[Double](5).t -> 2.23606797749979,
      DenseVector.ones[Double](10).t -> 3.1622776601683795
    )

    doubleExamples.keys.foreach( vector => {
      val m = magnitude(vector)
      printf("%-30s=%30s \n",doubleExamples(vector),m)
      assert(m.equals(doubleExamples(vector)))
    })
  }

  @Test
  def testMagnitudeFloat{
    val floatExamples = Map(
      DenseVector.zeros[Float](5).t -> 0.0.toFloat,
      DenseVector.ones[Float](5).t -> 2.23606797749979.toFloat,
      DenseVector.ones[Float](10).t -> 3.1622776601683795.toFloat
    )

    floatExamples.keys.foreach( vector => {
      val m = magnitude(vector)
      printf("%-30s=%30s \n",floatExamples(vector),m)
      assert(m.equals(floatExamples(vector)))
    })
  }

  @Test
  def testCosineSimilarityDouble{
    val epsilon = 0.0001
    val doubleExamples = Map(
      (DenseVector.ones[Double](5).t, DenseVector.ones[Double](5).t) -> 1.0,
      (DenseVector(1.0, 0.0, 0.0).t, DenseVector(0.0, 1.0, 0.0).t) -> 0.0,
      (DenseVector(1.0, 1.0, 0.0).t, DenseVector(0.0, 1.0, 1.0).t) -> 0.5,
      (DenseVector(1.0, 1.0, 0.0, 0.0, 0.0, 0.0).t, DenseVector(0.0, 1.0, 1.0, 0.0, 0.0, 0.0).t) -> 0.5

    )
    doubleExamples.keys.foreach( vectors => {
      val sim = cosineSimilarity(vectors._1, vectors._2)
      printf("%-30s=%30s (+/-(%s)) \n",doubleExamples(vectors), sim, epsilon)
      assert((sim - doubleExamples(vectors)) < epsilon)
    })
  }

  @Test
  def testCosineSimilarityFloat{
    val epsilon = 0.0001
    val doubleExamples = Map(
      (DenseVector.ones[Float](5).t, DenseVector.ones[Float](5).t) -> 1.0.toFloat

    )
    doubleExamples.keys.foreach( vectors => {
      val sim = cosineSimilarity(vectors._1, vectors._2)
      printf("%-30s=%30s (+/-(%s)) \n",doubleExamples(vectors), sim, epsilon)
      assert((sim - doubleExamples(vectors)) < epsilon)
    })
  }

  @Test(expected = classOf[IllegalArgumentException])
  def testCosineSimilarityThrowsOnWrongDimensions{
    printf("Testing that cosine similarity fails on dimension mismatch..")
    val epsilon = 0.0001
    val doubleExamples = Map(
      (DenseVector.ones[Float](6).t, DenseVector.ones[Float](5).t) -> 1.0.toFloat

    )

    doubleExamples.keys.foreach( vectors => {
        val sim = cosineSimilarity(vectors._1, vectors._2)
        printf("%-30s=%30s (+/-(%s)) \n",doubleExamples(vectors), sim, epsilon)
        assert((sim - doubleExamples(vectors)) < epsilon)

    })
  }




} 
Example 115
Source File: WeightedLabeledPoint.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.math.stats.regression

import breeze.linalg.DenseVector
import org.apache.spark.mllib.random.RandomRDDs
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext

case class WeightedLabeledPoint(label: Double, weight: Double, features: DenseVector[Double]) {
  
  def generateSampleData(sc: SparkContext, weights: DenseVector[Double], intercept: Double,
    numRows: Long = 100L, numPartitions: Int = 4, errorScalar: Double = 1.0,
    seed: Long = 1L): RDD[WeightedLabeledPoint] = {
    val len = weights.length + 2
    // The last entry will serve as the weight of point and the second last entry will serve
    // as noisy of the label.
    val data = RandomRDDs.normalVectorRDD(sc, numRows, len, numPartitions, seed)
    data.map { d =>
      val fw = d.toArray
      val x = new DenseVector(fw.dropRight(2))
      WeightedLabeledPoint(
        weights.dot(x) + intercept + errorScalar * fw(len - 2),
        Math.abs(fw(len - 1)) + 0.5, x
      )
    }
  }
} 
Example 116
Source File: OLSMultipleLinearRegression.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.math.stats.regression

import org.apache.spark.rdd.RDD
import breeze.linalg.{ DenseMatrix, DenseVector }

object OLSMultipleLinearRegression {

  
  def regression(input: RDD[WeightedLabeledPoint], intercept: Boolean = true): LinearRegressionModel = {
    // Try to get the number of columns
    val nCols = if (intercept) {
      input.first.features.length + 1
    } else {
      input.first.features.length
    }

    val (xx, xy, swx, srwsl, ssrw, wsl, sw, n, lw) = input.treeAggregate((
      new DenseMatrix[Double](nCols, nCols), // 1. Calculate a k-by-k matrix X^TX.
      new DenseVector[Double](nCols), // 2. Calculate a k-dimension vector X^Ty.
      new DenseVector[Double](nCols), // 3. Calculate a k-dimension vector of weighted sum of X.
      0.0, // 4. Calculate the square root weighted sum of labels.
      0.0, // 5. Calculate the sum of square root of weights.
      0.0, // 6. Calculate the weighted sum of labels.
      0.0, // 7. Calculate the sum of weights.
      0: Long, // 8. Calculate the length of input.
      0.0 // 9. Calculate sum of log weights
    ))(
      // U is a pair of matrix and vector and v is a WeightedLabeledPoint.
      seqOp = (U, v) => {
      // Append 1.0 at the head for calculating intercept.
      val x = if (intercept) {
        DenseVector.vertcat(DenseVector(1.0), v.features)
      } else {
        v.features
      }
      val wx = x * v.weight
      val sqrtW = Math sqrt v.weight
      // Unfortunately, breeze.linalg.DenseVector does not support tensor product.
      (U._1 += wx.asDenseMatrix.t * x.asDenseMatrix,
        U._2 += wx * v.label,
        U._3 += wx,
        U._4 + v.label * sqrtW,
        U._5 + sqrtW,
        U._6 + v.label * v.weight,
        U._7 + v.weight,
        U._8 + 1,
        U._9 + math.log(v.weight))
    }, combOp = (U1, U2) => (
      U1._1 += U2._1,
      U1._2 += U2._2,
      U1._3 += U2._3,
      U1._4 + U2._4,
      U1._5 + U2._5,
      U1._6 + U2._6,
      U1._7 + U2._7,
      U1._8 + U2._8,
      U1._9 + U2._9
    )
    )
    LinearRegressionModel(input, intercept, n, (xx + xx.t) :/ 2.0, xy, swx, srwsl, ssrw, wsl, sw, lw)
  }
} 
Example 117
Source File: RegressionSummarizer.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.rdd.function.summarize.summarizer

import breeze.linalg.{ DenseVector, DenseMatrix }

case class RegressionRow(
  time: Long,
  x: Array[Double],
  y: Double,
  weight: Double
)

object RegressionSummarizer {
  
  protected[summarizer] def computeAkaikeIC(
    beta: DenseVector[Double],
    logLikelihood: Double,
    shouldIntercept: Boolean
  ): Double = {
    val k = beta.length
    -2.0 * logLikelihood + 2.0 * k
  }

  protected[summarizer] def computeResidualSumOfSquares(
    beta: DenseVector[Double],
    sumOfYSquared: Double,
    vectorOfXY: DenseVector[Double],
    matrixOfXX: DenseMatrix[Double]
  ): Double = {
    val k = beta.length
    require(matrixOfXX.rows == matrixOfXX.cols && vectorOfXY.length == k && matrixOfXX.cols == k)
    var residualSumOfSquares = sumOfYSquared
    var i = 0
    while (i < beta.length) {
      var rss = -2.0 * vectorOfXY(i)
      rss += beta(i) * matrixOfXX(i, i)
      var j = 0
      while (j < i) {
        rss += 2.0 * beta(j) * matrixOfXX(i, j)
        j = j + 1
      }
      residualSumOfSquares += rss * beta(i)
      i = i + 1
    }
    residualSumOfSquares
  }

  protected[summarizer] def computeRSquared(
    sumOfYSquared: Double,
    sumOfWeights: Double,
    sumOfY: Double,
    residualSumOfSquares: Double,
    shouldIntercept: Boolean
  ): Double = if (sumOfYSquared == 0.0 || sumOfWeights == 0.0) {
    Double.NaN
  } else {
    val meanOfY = sumOfY / sumOfWeights
    var varianceOfY = sumOfYSquared / sumOfWeights
    if (shouldIntercept) {
      varianceOfY -= meanOfY * meanOfY
    }
    (varianceOfY - residualSumOfSquares / sumOfWeights) / varianceOfY
  }

} 
Example 118
Source File: RegressionSummarizerSpec.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.rdd.function.summarize.summarizer

import breeze.linalg.{ DenseVector, DenseMatrix }
import org.scalatest.FlatSpec

class RegressionSummarizerSpec extends FlatSpec {

  "RegressionSummarizer" should "transform from RegressRow correctly" in {
    val x: Array[RegressionRow] = Array(
      RegressionRow(time = 0L, x = Array(1d, 2d), y = 3d, weight = 4d),
      RegressionRow(time = 0L, x = Array(4d, 5d), y = 6d, weight = 16d)
    )

    val (response1, predictor1, yw1) = RegressionSummarizer.transform(x, shouldIntercept = true, isWeighted = true)
    assert(response1.equals(DenseMatrix(Array(2d, 2d, 4d), Array(4d, 16d, 20d))))
    assert(predictor1.equals(DenseVector(Array(6d, 24d))))
    assert(yw1.deep == Array((3d, 4d), (6d, 16d)).deep)

    val (response2, predictor2, yw2) = RegressionSummarizer.transform(x, shouldIntercept = true, isWeighted = false)
    assert(response2.equals(DenseMatrix(Array(1d, 1d, 2d), Array(1d, 4d, 5d))))
    assert(predictor2.equals(DenseVector(Array(3d, 6d))))
    assert(yw2.deep == Array((3d, 1d), (6d, 1d)).deep)

    val (response3, predictor3, yw3) = RegressionSummarizer.transform(x, shouldIntercept = false, isWeighted = true)
    assert(response3.equals(DenseMatrix(Array(2d, 4d), Array(16d, 20d))))
    assert(predictor3.equals(DenseVector(Array(6d, 24d))))
    assert(yw3.deep == Array((3d, 4d), (6d, 16d)).deep)

    val (response4, predictor4, yw4) = RegressionSummarizer.transform(x, shouldIntercept = false, isWeighted = false)
    assert(response4.equals(DenseMatrix(Array(1d, 2d), Array(4d, 5d))))
    assert(predictor4.equals(DenseVector(Array(3d, 6d))))
    assert(yw4.deep == Array((3d, 1d), (6d, 1d)).deep)
  }
} 
Example 119
Source File: GPModelTest.scala    From automl   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.spark.automl

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.numerics.{cos, pow}
import com.tencent.angel.spark.automl.tuner.kernel.Matern5Iso
import com.tencent.angel.spark.automl.tuner.model.GPModel
import org.scalatest.FunSuite

class GPModelTest extends FunSuite {

  test("test_linear") {
    // Test linear: y=2*x
    val X = DenseMatrix((1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)).t
    val y = 2.0 * DenseVector(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
    val z = DenseMatrix((2.5, 4.5, 6.5, 8.5, 10.0, 12.0)).t
    val truePredZ = 2.0 * DenseVector(2.5, 4.5, 6.5, 8.5, 10.0, 12.0)

    val covFunc = Matern5Iso()
    val initCovParams = DenseVector(1.0, 1.0)
    val initNoiseStdDev = 0.01

    val gpModel = GPModel(covFunc, initCovParams, initNoiseStdDev)
    gpModel.fit(X, y)

    println("Fitted covariance function params:")
    println(gpModel.covParams)
    println("Fitted noiseStdDev:")
    println(gpModel.noiseStdDev)
    println("\n")

    val prediction = gpModel.predict(z)
    println("Mean and Var:")
    println(prediction)
    println("True value:")
    println(truePredZ)
  }

  test("test_cosine") {
    // Test no_linear: y=cos(x)+1
    val X = DenseMatrix((1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)).t
    val y = cos(DenseVector(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)) + 1.0
    val z = DenseMatrix((2.5, 4.5, 6.5, 8.5, 10.0, 12.0)).t
    val truePredZ = cos(DenseVector(2.5, 4.5, 6.5, 8.5, 10.0, 12.0)) + 1.01

    val covFunc = Matern5Iso()
    val initCovParams = DenseVector(1.0, 1.0)
    val initNoiseStdDev = 0.01

    val gpModel = GPModel(covFunc, initCovParams, initNoiseStdDev)
    gpModel.fit(X, y)

    println("Fitted covariance function params:")
    println(gpModel.covParams)
    println("Fitted noiseStdDev:")
    println(gpModel.noiseStdDev)
    println("\n")

    val prediction = gpModel.predict(z)
    println("Mean and Var:")
    println(prediction)
    println("True value:")
    println(truePredZ)
  }

  test("testSquare") {
    // Test no_linear: y=x^2
    val X = DenseMatrix((1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)).t
    val y = DenseVector(1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0, 81.0)
    val z = DenseMatrix((2.5, 4.5, 6.5, 8.5, 10.0, 12.0)).t
    val truePredZ = pow(z, 2)

    val covFunc = Matern5Iso()
    val initCovParams = DenseVector(1.0, 1.0)
    val initNoiseStdDev = 0.01

    val gpModel = GPModel(covFunc, initCovParams, initNoiseStdDev)
    gpModel.fit(X, y)

    println("Fitted covariance function params:")
    println(gpModel.covParams)
    println("Fitted noiseStdDev:")
    println(gpModel.noiseStdDev)
    println("\n")

    val prediction = gpModel.predict(z)
    println("Mean and Var:")
    println(prediction)
    println("True value:")
    println(truePredZ)
  }
} 
Example 120
Source File: SquareDistTest.scala    From automl   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.spark.automl

import breeze.linalg.{DenseMatrix, DenseVector}
import com.tencent.angel.spark.automl.tuner.math.SquareDist
import org.junit.Assert._
import org.scalatest.FunSuite

class SquareDistTest extends FunSuite {

  test("test_XX_1D") {

    val x = DenseVector(1.0, 2.0, 3.0).toDenseMatrix.t
    val expected = DenseMatrix((0.0, 1.0, 4.0), (1.0, 0.0, 1.0), (4.0, 1.0, 0.0))
    assertEquals(expected, SquareDist(x, x))
  }

  test("test_XX_2D") {

    val x = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0)).t
    val expected = DenseMatrix((0.0, 2.0, 8.0), (2.0, 0.0, 2.0), (8.0, 2.0, 0.0))
    assertEquals(expected, SquareDist(x, x))
  }

  test("test_XY_1D") {

    val x1 = DenseVector(1.0, 2.0, 3.0).toDenseMatrix.t
    val x2 = DenseVector(4.0, 5.0).toDenseMatrix.t

    val expected = DenseMatrix((9.0, 16.0), (4.0, 9.0), (1.0, 4.0))
    assertEquals(expected, SquareDist(x1, x2))
  }

  test("test_XY_2D") {

    val x1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0)).t
    val x2 = DenseMatrix((7.0, 8.0), (9.0, 10.0)).t

    val expected = DenseMatrix((61.0, 85.0), (41.0, 61.0), (25.0, 41.0))
    assertEquals(expected, SquareDist(x1, x2))
  }
} 
Example 121
Source File: X2P.scala    From spark-tsne   with Apache License 2.0 5 votes vote down vote up
package com.github.saurfang.spark.tsne

import breeze.linalg.DenseVector
import org.apache.spark.mllib.X2PHelper._
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry, RowMatrix}
import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
import org.slf4j.LoggerFactory

object X2P {

  private def logger = LoggerFactory.getLogger(X2P.getClass)

  def apply(x: RowMatrix, tol: Double = 1e-5, perplexity: Double = 30.0): CoordinateMatrix = {
    require(tol >= 0, "Tolerance must be non-negative")
    require(perplexity > 0, "Perplexity must be positive")

    val mu = (3 * perplexity).toInt //TODO: Expose this as parameter
    val logU = Math.log(perplexity)
    val norms = x.rows.map(Vectors.norm(_, 2.0))
    norms.persist()
    val rowsWithNorm = x.rows.zip(norms).map{ case (v, norm) => VectorWithNorm(v, norm) }
    val neighbors = rowsWithNorm.zipWithIndex()
      .cartesian(rowsWithNorm.zipWithIndex())
      .flatMap {
      case ((u, i), (v, j)) =>
        if(i < j) {
          val dist = fastSquaredDistance(u, v)
          Seq((i, (j, dist)), (j, (i, dist)))
        } else Seq.empty
    }
      .topByKey(mu)(Ordering.by(e => -e._2))

    val p_betas =
      neighbors.map {
        case (i, arr) =>
          var betamin = Double.NegativeInfinity
          var betamax = Double.PositiveInfinity
          var beta = 1.0

          val d = DenseVector(arr.map(_._2))
          var (h, p) = Hbeta(d, beta)

          //logInfo("data was " + d.toArray.toList)
          //logInfo("array P was " + p.toList)

          // Evaluate whether the perplexity is within tolerance
          def Hdiff = h - logU
          var tries = 0
          while (Math.abs(Hdiff) > tol && tries < 50) {
            //If not, increase or decrease precision
            if (Hdiff > 0) {
              betamin = beta
              beta = if (betamax.isInfinite) beta * 2 else (beta + betamax) / 2
            } else {
              betamax = beta
              beta = if (betamin.isInfinite) beta / 2 else (beta + betamin) / 2
            }

            // Recompute the values
            val HP = Hbeta(d, beta)
            h = HP._1
            p = HP._2
            tries = tries + 1
          }

          //logInfo("array P is " + p.toList)

          (arr.map(_._1).zip(p.toArray).map { case (j, v) => MatrixEntry(i, j, v) }, beta)
      }

    logger.info("Mean value of sigma: " + p_betas.map(x => math.sqrt(1 / x._2)).mean)
    new CoordinateMatrix(p_betas.flatMap(_._1))
  }
} 
Example 122
Source File: MeanAveragePrecisionEvaluator.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.evaluation

import breeze.linalg.DenseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._


  private def getAP(precisions: Array[Double], recalls: Array[Double]) = {
    var ap = 0.0
    val levels = (0 to 10).map(x => x / 10.0)
    levels.foreach { t =>
      // Find where recalls are greater than t and precision values at those indices
      val px = recalls.toSeq.zipWithIndex.filter(x => x._1 >= t).map(x => precisions(x._2))
      val p = if (px.isEmpty) {
        0.0
      } else {
        px.max
      }
      ap = ap + p / 11.0
    }
    ap
  }

} 
Example 123
Source File: MLlibUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils

import breeze.linalg.{SparseVector, DenseMatrix, DenseVector}


  def breezeVectorToMLlib(breezeVector: breeze.linalg.Vector[Double]): org.apache.spark.mllib.linalg.Vector = {
    breezeVector match {
      case v: DenseVector[Double] =>
        if (v.offset == 0 && v.stride == 1 && v.length == v.data.length) {
          new org.apache.spark.mllib.linalg.DenseVector(v.data)
        } else {
          new org.apache.spark.mllib.linalg.DenseVector(v.toArray)  // Can't use underlying array directly, so make a new one
        }
      case v: SparseVector[Double] =>
        if (v.index.length == v.used) {
          new org.apache.spark.mllib.linalg.SparseVector(v.length, v.index, v.data)
        } else {
          new org.apache.spark.mllib.linalg.SparseVector(v.length, v.index.slice(0, v.used), v.data.slice(0, v.used))
        }
      case v: breeze.linalg.Vector[_] =>
        sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
    }
  }

} 
Example 124
Source File: GaussianMixtureModelEstimator.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.learning.external

import breeze.linalg.{convert, DenseMatrix, DenseVector}
import keystoneml.nodes.learning.GaussianMixtureModel
import org.apache.spark.rdd.RDD
import keystoneml.utils.external.EncEval
import keystoneml.workflow.Estimator


  def fit(samples: Array[DenseVector[Double]]): GaussianMixtureModel = {
    val extLib = new EncEval
    val nDim = samples(0).length

    // Flatten this thing out.
    val sampleFloats = samples.map(_.toArray.map(_.toFloat))
    val res = extLib.computeGMM(k, nDim, sampleFloats.flatten)

    val meanSize = k*nDim
    val varSize = k*nDim
    val coefSize = k*nDim

    // Each array region is expected to be centroid-major.
    val means = convert(new DenseMatrix(nDim, k, res.slice(0, meanSize)), Double)
    val vars = convert(new DenseMatrix(nDim, k, res.slice(meanSize, meanSize+varSize)), Double)
    val coefs = convert(new DenseVector(res.slice(meanSize+varSize, meanSize+varSize+coefSize)), Double)

    new GaussianMixtureModel(means, vars, coefs)
  }
} 
Example 125
Source File: Windower.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.images

import breeze.linalg.DenseVector
import org.apache.spark.rdd.RDD
import keystoneml.pipelines.FunctionNode
import keystoneml.utils.{ImageMetadata, ChannelMajorArrayVectorizedImage, Image}



class Windower(
    stride: Int,
    windowSize: Int) extends FunctionNode[RDD[Image], RDD[Image]] {

  def apply(in: RDD[Image]) = {
    in.flatMap(getImageWindow)
  }

  def getImageWindow(image: Image) = {
    val xDim = image.metadata.xDim
    val yDim = image.metadata.yDim
    val numChannels = image.metadata.numChannels

    // Start at (0,0) in (x, y) and
    (0 until xDim - windowSize + 1 by stride).flatMap { x =>
      (0 until yDim - windowSize + 1 by stride).map { y =>
        // Extract the window.
        val pool = new DenseVector[Double](windowSize * windowSize * numChannels)
        val startX = x
        val endX = x + windowSize
        val startY = y
        val endY = y + windowSize

        var c = 0
        while (c < numChannels) {
          var s = startX
          while (s < endX) {
            var b = startY
            while (b < endY) {
              pool(c + (s-startX)*numChannels +
                (b-startY)*(endX-startX)*numChannels) = image.get(s, b, c)
              b = b + 1
            }
            s = s + 1
          }
          c = c + 1
        }
        ChannelMajorArrayVectorizedImage(pool.toArray,
          ImageMetadata(windowSize, windowSize, numChannels))
      }
    }
  }

} 
Example 126
Source File: Pooler.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.images

import breeze.linalg.DenseVector
import keystoneml.pipelines._
import keystoneml.utils.{ImageMetadata, ChannelMajorArrayVectorizedImage, Image}
import keystoneml.workflow.Transformer


class Pooler(
    stride: Int,
    poolSize: Int,
    pixelFunction: Double => Double,
    poolFunction: DenseVector[Double] => Double)
  extends Transformer[Image, Image] {

  val strideStart = poolSize / 2

  def apply(image: Image) = {
    val xDim = image.metadata.xDim
    val yDim = image.metadata.yDim
    val numChannels = image.metadata.numChannels

    val numPoolsX = math.ceil((xDim - strideStart).toDouble / stride).toInt
    val numPoolsY = math.ceil((yDim - strideStart).toDouble / stride).toInt
    val patch = new Array[Double]( numPoolsX * numPoolsY * numChannels)

    // Start at strideStart in (x, y) and
    for (x <- strideStart until xDim by stride;
         y <- strideStart until yDim by stride) {
      // Extract the pool. Then apply the pixel and pool functions

      val pool = DenseVector.zeros[Double](poolSize * poolSize)
      val startX = x - poolSize/2
      val endX = math.min(x + poolSize/2, xDim)
      val startY = y - poolSize/2
      val endY = math.min(y + poolSize/2, yDim)

      var c = 0
      while (c < numChannels) {
        var s = startX
        while (s < endX) {
          var b = startY
          while (b < endY) {
            pool((s-startX) + (b-startY)*(endX-startX)) =
              pixelFunction(image.get(s, b, c))
            b = b + 1
          }
          s = s + 1
        }
        patch(c + (x - strideStart)/stride * numChannels +
          (y - strideStart)/stride * numPoolsX * numChannels) = poolFunction(pool)
        c = c + 1
      }
    }
    ChannelMajorArrayVectorizedImage(patch, ImageMetadata(numPoolsX, numPoolsY, numChannels))
  }
} 
Example 127
Source File: SignedHellingerMapper.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.stats

import breeze.linalg.{DenseVector, DenseMatrix}
import breeze.numerics._
import keystoneml.workflow.Transformer


object SignedHellingerMapper extends Transformer[DenseVector[Double], DenseVector[Double]] {
  def apply(in: DenseVector[Double]): DenseVector[Double] = {
    signum(in) :* sqrt(abs(in))
  }
}

object BatchSignedHellingerMapper extends Transformer[DenseMatrix[Float], DenseMatrix[Float]] {
  def apply(in: DenseMatrix[Float]): DenseMatrix[Float] = {
    in.map(x => (math.signum(x) * math.sqrt(math.abs(x))).toFloat)
  }
} 
Example 128
Source File: StandardScaler.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.stats

import breeze.linalg.DenseVector
import breeze.numerics.sqrt
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
import org.apache.spark.rdd.RDD
import keystoneml.utils.MLlibUtils
import keystoneml.workflow.{Transformer, Estimator}


  override def fit(data: RDD[DenseVector[Double]]): StandardScalerModel = {
    val summary = data.treeAggregate(new MultivariateOnlineSummarizer)(
      (aggregator, data) => aggregator.add(MLlibUtils.breezeVectorToMLlib(data)),
      (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
    if (normalizeStdDev) {
      new StandardScalerModel(
        MLlibUtils.mllibVectorToDenseBreeze(summary.mean),
        Some(sqrt(MLlibUtils.mllibVectorToDenseBreeze(summary.variance))
            .map(r => if (r.isNaN | r.isInfinite | math.abs(r) < eps) 1.0 else r)))
    } else {
      new StandardScalerModel(
        MLlibUtils.mllibVectorToDenseBreeze(summary.mean),
        None)
    }
  }
} 
Example 129
Source File: ClassLabelIndicators.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.util

import breeze.linalg.DenseVector
import org.apache.spark.rdd.RDD
import keystoneml.pipelines._
import keystoneml.workflow.Transformer


case class ClassLabelIndicatorsFromIntArrayLabels(numClasses: Int, validate: Boolean = false)
    extends Transformer[Array[Int], DenseVector[Double]] {

  assert(numClasses > 1, "numClasses must be > 1.")

  def apply(in: Array[Int]): DenseVector[Double] = {
    if(validate && (in.max >= numClasses || in.min < 0)) {
      throw new RuntimeException("Class labels are expected to be in the range [0, numClasses)")
    }

    val indicatorVector = DenseVector.fill(numClasses, -1.0)
    var i = 0
    while (i < in.length) {
      indicatorVector(in(i)) = 1.0
      i += 1
    }
    indicatorVector
  }
} 
Example 130
Source File: VectorSplitter.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.util

import breeze.linalg.DenseVector
import org.apache.spark.rdd.RDD
import keystoneml.pipelines.FunctionNode


class VectorSplitter(
    blockSize: Int,
    numFeaturesOpt: Option[Int] = None) 
  extends FunctionNode[RDD[DenseVector[Double]], Seq[RDD[DenseVector[Double]]]] {

  override def apply(in: RDD[DenseVector[Double]]): Seq[RDD[DenseVector[Double]]] = {
    val numFeatures = numFeaturesOpt.getOrElse(in.first.length)
    val numBlocks = math.ceil(numFeatures.toDouble / blockSize).toInt
    (0 until numBlocks).map { blockNum =>
      in.map { vec =>
        // Expliclity call toArray as breeze's slice is lazy
        val end = math.min(numFeatures, (blockNum + 1) * blockSize)
        DenseVector(vec.slice(blockNum * blockSize, end).toArray)
      }
    }
  }

  def splitVector(in: DenseVector[Double]): Seq[DenseVector[Double]] = {
    val numFeatures = numFeaturesOpt.getOrElse(in.length)
    val numBlocks = math.ceil(numFeatures.toDouble / blockSize).toInt
    (0 until numBlocks).map { blockNum =>
      // Expliclity call toArray as breeze's slice is lazy
      val end = math.min(numFeatures, (blockNum + 1) * blockSize)
      DenseVector(in.slice(blockNum * blockSize, end).toArray)
    }
  }
} 
Example 131
Source File: TimitFeaturesDataLoader.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.loaders

import breeze.linalg.DenseVector
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import scala.collection.mutable


  def apply(sc: SparkContext,
      trainDataLocation: String,
      trainLabelsLocation: String,
      testDataLocation: String,
      testLabelsLocation: String,
      numParts: Int = 512): TimitFeaturesData = {
    val trainData = CsvDataLoader(sc, trainDataLocation, numParts)
    val trainLabels = createLabelsRDD(parseSparseLabels(trainLabelsLocation), trainData)

    val testData = CsvDataLoader(sc, testDataLocation, numParts)
    val testLabels = createLabelsRDD(parseSparseLabels(testLabelsLocation), testData)
    TimitFeaturesData(LabeledData(trainLabels.zip(trainData)), LabeledData(testLabels.zip(testData)))
  }
} 
Example 132
Source File: LinearPixels.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.pipelines.images.cifar

import breeze.linalg.DenseVector
import keystoneml.evaluation.MulticlassClassifierEvaluator
import keystoneml.loaders.CifarLoader
import keystoneml.nodes.images.{GrayScaler, ImageExtractor, ImageVectorizer, LabelExtractor}
import keystoneml.nodes.learning.LinearMapEstimator
import keystoneml.nodes.util.{Cacher, ClassLabelIndicatorsFromIntLabels, MaxClassifier}
import org.apache.spark.{SparkConf, SparkContext}
import keystoneml.pipelines.Logging
import scopt.OptionParser
import keystoneml.utils.Image
import keystoneml.workflow.Pipeline


object LinearPixels extends Logging {
  val appName = "LinearPixels"
  case class LinearPixelsConfig(trainLocation: String = "", testLocation: String = "")

  def run(sc: SparkContext, config: LinearPixelsConfig): Pipeline[Image, Int] = {
    val numClasses = 10

    // Load and cache the training data.
    val trainData = CifarLoader(sc, config.trainLocation).cache()

    val trainImages = ImageExtractor(trainData)

    val labelExtractor = LabelExtractor andThen
        ClassLabelIndicatorsFromIntLabels(numClasses) andThen
        new Cacher[DenseVector[Double]]
    val trainLabels = labelExtractor(trainData)

    // A featurizer maps input images into vectors. For this pipeline, we'll also convert the image to grayscale.
    // We then estimate our model by calling a linear solver on our data.
    val predictionPipeline = GrayScaler andThen
      ImageVectorizer andThen
      (new LinearMapEstimator, trainImages, trainLabels) andThen
      MaxClassifier

    // Calculate training error.
    val evaluator = new MulticlassClassifierEvaluator(numClasses)
    val trainEval = evaluator.evaluate(predictionPipeline(trainImages), LabelExtractor(trainData))

    // Do testing.
    val testData = CifarLoader(sc, config.testLocation)
    val testImages = ImageExtractor(testData)
    val testLabels = labelExtractor(testData)

    val testEval = evaluator.evaluate(predictionPipeline(testImages), LabelExtractor(testData))

    logInfo(s"Training accuracy: \n${trainEval.totalAccuracy}")
    logInfo(s"Test accuracy: \n${testEval.totalAccuracy}")

    predictionPipeline
  }

  def parse(args: Array[String]): LinearPixelsConfig = new OptionParser[LinearPixelsConfig](appName) {
    head(appName, "0.1")
    help("help") text("prints this usage text")
    opt[String]("trainLocation") required() action { (x,c) => c.copy(trainLocation=x) }
    opt[String]("testLocation") required() action { (x,c) => c.copy(testLocation=x) }
  }.parse(args, LinearPixelsConfig()).get

  
  def main(args: Array[String]) = {
    val appConfig = parse(args)

    val conf = new SparkConf().setAppName(appName)
    conf.setIfMissing("spark.master", "local[2]") // This is a fallback if things aren't set via spark submit.
    val sc = new SparkContext(conf)
    run(sc, appConfig)

    sc.stop()
  }

} 
Example 133
Source File: MeanAveragePrecisionSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.evaluation

import breeze.linalg.DenseVector
import org.scalatest.FunSuite
import org.apache.spark.SparkContext
import keystoneml.utils.Stats
import keystoneml.workflow.PipelineContext

class MeanAveragePrecisionSuite extends FunSuite with PipelineContext {

  test("random map test") {
    sc = new SparkContext("local", "test")

    // Build some random test data with 4 classes 0,1,2,3
    val actual = List(Array(0, 3), Array(2), Array(1, 2), Array(0))
    val actualRdd = sc.parallelize(actual)

    val predicted = List(
      DenseVector(0.1, -0.05, 0.12, 0.5),
      DenseVector(-0.23, -0.45, 0.23, 0.1),
      DenseVector(-0.34, -0.32, -0.66, 1.52),
      DenseVector(-0.1, -0.2, 0.5, 0.8))

    val predictedRdd = sc.parallelize(predicted)

    val map = new MeanAveragePrecisionEvaluator(4).evaluate(predictedRdd, actualRdd)

    // Expected values from running this in MATLAB
    val expected = DenseVector(1.0, 0.3333, 0.5, 0.3333)

    assert(Stats.aboutEq(map, expected, 1e-4))
  }
} 
Example 134
Source File: BlockLinearMapperSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.learning

import breeze.linalg.{DenseVector, DenseMatrix}
import breeze.stats.distributions.Rand
import keystoneml.workflow.PipelineContext
import scala.collection.mutable.ArrayBuffer

import org.scalatest.FunSuite

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import keystoneml.pipelines._
import keystoneml.utils.Stats

class BlockLinearMapperSuite extends FunSuite with PipelineContext with Logging {

  test("BlockLinearMapper transformation") {
    sc = new SparkContext("local", "test")

    val inDims = 1000
    val outDims = 100
    val numChunks = 5
    val numPerChunk = inDims/numChunks

    val mat = DenseMatrix.rand(inDims, outDims, Rand.gaussian)
    val vec = DenseVector.rand(inDims, Rand.gaussian)
    val intercept = DenseVector.rand(outDims, Rand.gaussian)

    val splitVec = (0 until numChunks).map(i => vec((numPerChunk*i) until (numPerChunk*i + numPerChunk)))
    val splitMat = (0 until numChunks).map(i => mat((numPerChunk*i) until (numPerChunk*i + numPerChunk), ::))

    val linearMapper = new LinearMapper[DenseVector[Double]](mat, Some(intercept))
    val blockLinearMapper = new BlockLinearMapper(splitMat, numPerChunk, Some(intercept))

    val linearOut = linearMapper(vec)

    // Test with intercept
    assert(Stats.aboutEq(blockLinearMapper(vec), linearOut, 1e-4))

    // Test the apply and evaluate call
    val blmOuts = new ArrayBuffer[RDD[DenseVector[Double]]]
    val splitVecRDDs = splitVec.map { vec =>
      sc.parallelize(Seq(vec), 1)
    }
    blockLinearMapper.applyAndEvaluate(splitVecRDDs,
      (predictedValues: RDD[DenseVector[Double]]) => {
        blmOuts += predictedValues
        ()
      }
    )

    // The last blmOut should match the linear mapper's output
    assert(Stats.aboutEq(blmOuts.last.collect()(0), linearOut, 1e-4))
  }
} 
Example 135
Source File: PoolingSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.images

import breeze.linalg.{DenseVector, sum}
import keystoneml.nodes._
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{ChannelMajorArrayVectorizedImage, ImageMetadata}

class PoolingSuite extends FunSuite with Logging {

  test("pooling") {
    val imgArr =
      (0 until 4).flatMap { x =>
        (0 until 4).flatMap { y =>
          (0 until 1).map { c =>
            (c + x * 1 + y * 4 * 1).toDouble
          }
        }
      }.toArray

    val image = new ChannelMajorArrayVectorizedImage(imgArr, ImageMetadata(4, 4, 1))
    val pooling = new Pooler(2, 2, x => x, x => x.max)

    val poolImage = pooling(image)

    assert(poolImage.get(0, 0, 0) === 5.0)
    assert(poolImage.get(0, 1, 0) === 7.0)
    assert(poolImage.get(1, 0, 0) === 13.0)
    assert(poolImage.get(1, 1, 0) === 15.0)
  }

  test("pooling odd") {
    val hogImgSize = 14
    val convSizes = List(1, 2, 3, 4, 6, 8)
    convSizes.foreach { convSize =>
      val convResSize = hogImgSize - convSize + 1

      val imgArr =
        (0 until convResSize).flatMap { x =>
          (0 until convResSize).flatMap { y =>
            (0 until 1000).map { c =>
              (c + x * 1 + y * 4 * 1).toDouble
            }
          }
        }.toArray

      val image = new ChannelMajorArrayVectorizedImage(
        imgArr, ImageMetadata(convResSize, convResSize, 1000))

      val poolSizeReqd = math.ceil(convResSize / 2.0).toInt

      // We want poolSize to be even !!
      val poolSize = (math.ceil(poolSizeReqd / 2.0) * 2).toInt
      // overlap as little as possible
      val poolStride = convResSize - poolSize


      println(s"VALUES: $convSize $convResSize $poolSizeReqd $poolSize $poolStride")

      def summ(x: DenseVector[Double]): Double = sum(x)

      val pooling = new Pooler(poolStride, poolSize, identity, summ)
      val poolImage = pooling(image)
    }
  }
} 
Example 136
Source File: ClassLabelIndicatorsSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.util

import breeze.linalg.DenseVector
import org.scalatest.FunSuite

class ClassLabelIndicatorsSuite extends FunSuite {
  test("single label indicators") {
    intercept[AssertionError] {
      val zerolabels = ClassLabelIndicatorsFromIntLabels(0)
    }

    intercept[AssertionError] {
      val onelabel = ClassLabelIndicatorsFromIntLabels(1)
    }


    val fivelabel = ClassLabelIndicatorsFromIntLabels(5)
    assert(fivelabel(2) === DenseVector(-1.0,-1.0,1.0,-1.0,-1.0))

    intercept[RuntimeException] {
      fivelabel(5)
    }
  }

  test("multiple label indicators without validation") {
    intercept[AssertionError] {
      val zerolabels = ClassLabelIndicatorsFromIntArrayLabels(0)
    }

    intercept[AssertionError] {
      val onelabel = ClassLabelIndicatorsFromIntArrayLabels(1)
    }

    val fivelabel = ClassLabelIndicatorsFromIntArrayLabels(5)

    assert(fivelabel(Array(2,1)) === DenseVector(-1.0,1.0,1.0,-1.0,-1.0))

    intercept[IndexOutOfBoundsException] {
      fivelabel(Array(4,6))
    }

    assert(fivelabel(Array(-1,2)) === DenseVector(-1.0,-1.0,1.0,-1.0,1.0),
      "In the unchecked case, we should get weird behavior.")

  }

  test("multiple label indicators with validation") {
    intercept[AssertionError] {
      val zerolabels = ClassLabelIndicatorsFromIntArrayLabels(0, true)
    }

    intercept[AssertionError] {
      val onelabel = ClassLabelIndicatorsFromIntArrayLabels(1, true)
    }

    val fivelabel = ClassLabelIndicatorsFromIntArrayLabels(5, true)

    assert(fivelabel(Array(2,1)) === DenseVector(-1.0,1.0,1.0,-1.0,-1.0))

    intercept[RuntimeException] {
      fivelabel(Array(4,6))
    }

    intercept[RuntimeException] {
      fivelabel(Array(-1,2))
    }
  }
} 
Example 137
Source File: TopKClassifierSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.util

import breeze.linalg.DenseVector
import org.apache.spark.SparkContext
import org.scalatest.FunSuite
import keystoneml.workflow.PipelineContext

class TopKClassifierSuite extends FunSuite with PipelineContext {
  test("top k classifier, k <= vector size") {
    sc = new SparkContext("local", "test")

    assert(TopKClassifier(2).apply(DenseVector(-10.0, 42.4, -43.0, 23.0)) === Array(1, 3))
    assert(TopKClassifier(4).apply(DenseVector(Double.MinValue, Double.MaxValue, 12.0, 11.0, 10.0)) === Array(1, 2, 3, 4))
    assert(TopKClassifier(3).apply(DenseVector(3.0, -23.2, 2.99)) === Array(0, 2, 1))
  }

  test("top k classifier, k > vector size") {
    sc = new SparkContext("local", "test")

    assert(TopKClassifier(5).apply(DenseVector(-10.0, 42.4, -43.0, 23.0)) === Array(1, 3, 0, 2))
    assert(TopKClassifier(2).apply(DenseVector(Double.MinValue)) === Array(0))
    assert(TopKClassifier(20).apply(DenseVector(3.0, -23.2, 2.99)) === Array(0, 2, 1))
  }

} 
Example 138
Source File: Dense.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.ml.keras



import breeze.linalg.{DenseMatrix, DenseVector}
import ucar.nc2.{Variable, Group}

class Dense(inputDim: Int, outputDim: Int) extends Functor{

  override def functorName = "Dense"

  override final def convert(data: DenseMatrix[Float]): DenseMatrix[Float] = {
    val z = data * w
    for (i <- 0 until data.rows){
      z(i, ::) :+= b.t
    }
    z
  }

  private val w = DenseMatrix.zeros[Float](inputDim, outputDim)
  private val b = DenseVector.zeros[Float](outputDim)

  def h5load(weight: Variable, bias: Variable): Unit = {
    val weightData = weight.read
    val weightIndex = weightData.getIndex
    val biasData = bias.read
    val biasIndex = biasData.getIndex
    for(y <- 0 until inputDim)
      for(x <- 0 until outputDim){
        w(y, x) = weightData.getFloat(weightIndex.set(y, x))
        if(y == 0)
          b(x) = biasData.getFloat(biasIndex.set(x))
      }
  }

  override def toString: String = "Dense: {inputDim: " + inputDim + ", outputDim: " + outputDim + "}"

  def head: String = w(0 until 2, ::).toString
}

object Dense{
  def apply(inputDim:Int, outputDim:Int) = new Dense(inputDim, outputDim)

  def apply(configs: Map[String, Any], weightGroups: Group): Dense = {
    val layerName = configs("name").toString
    val params = weightGroups.findGroup(layerName)
    val weightNames = params.findAttribute("weight_names")
    val weight = params.findVariable(weightNames.getStringValue(0))
    val bias = params.findVariable(weightNames.getStringValue(1))
    val dims = weight.getDimensions
    if(dims.size != 2){
      throw new IllegalArgumentException("invalid dimension for Dense class")
    }

    val d = new Dense(dims.get(0).getLength, dims.get(1).getLength)
    d.h5load(weight, bias)
    d
  }
} 
Example 139
Source File: Embedding.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.ml.keras



import breeze.linalg.{DenseMatrix, DenseVector}
import ucar.nc2.{Variable, Group}

class Embedding(vocabulary: Int, outDim: Int) extends Functor{

  override def functorName = "Embedding"

  override final def convert(data: DenseMatrix[Float]): DenseMatrix[Float] = {
    val arrayOfId = data.reshape(data.size, 1)
    val length = arrayOfId.size
    val z = DenseMatrix.zeros[Float](length, outDim)
    for(i <- 0 until length){
      z(i, ::) := w(arrayOfId(i, 0).asInstanceOf[Int]).t
    }
    z
  }

  private val w = new Array[DenseVector[Float]](vocabulary).map(_ => DenseVector.zeros[Float](outDim))

  def h5load(weight: Variable):Unit = {
    val weightData = weight.read
    val weightIndex = weightData.getIndex
    for(y <- 0 until vocabulary)
      for(x <- 0 until outDim)
        w(y)(x) = weightData.getFloat(weightIndex.set(y, x))
  }

}

object Embedding{
  def apply(vocabulary: Int, outDim: Int) = new Embedding(vocabulary, outDim)

  def apply(configs: Map[String, Any], weightGroups: Group): Embedding = {
    val layerName = configs("name").toString
    val params = weightGroups.findGroup(layerName)
    val weightNames = params.findAttribute("weight_names")
    val weight = params.findVariable(weightNames.getStringValue(0))
    val dims = weight.getDimensions
    if(dims.size != 2){
      throw new IllegalArgumentException("Invalid dimension for Embedding class")
    }
    val e = new Embedding(dims.get(0).getLength, dims.get(1).getLength)
    e.h5load(weight)
    e
  }
} 
Example 140
Source File: Convolution1D.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.ml.keras



import breeze.linalg.{DenseMatrix, DenseVector}
import ucar.nc2.{Variable, Group}

// Convolution operator for filtering neighborhoods of one-dimensional inputs.
class Convolution1D(outCh: Int, width: Int, inputDim: Int, padding: Boolean) extends Functor{

  override def functorName = "Convolution1D"

  override final def convert(data: DenseMatrix[Float]): DenseMatrix[Float] = {
    val work = im2col(data) * w
    for (i <- 0 until work.rows)
      work(i, ::) :+= b.t

    work
  }

  private val w = DenseMatrix.zeros[Float](width * inputDim, outCh)
  private val b = DenseVector.zeros[Float](outCh)

  private val paddingRow: Int = if (padding) {
    (width - 1) / 2
  } else {
    0
  }

  private def im2col(x: DenseMatrix[Float]): DenseMatrix[Float] = {
    val inputSize = width * inputDim
    val work = DenseMatrix.zeros[Float](x.rows, inputSize)
    val x1 = x.rows

    for(k1 <- 0 until x1)
        for(d2 <- 0 until width)
          for(d1 <- 0 until inputDim) {
            val i1 = k1 - paddingRow + d2
            val j1 = d1 + d2 * inputDim
            if (i1 >= 0 & i1 < x1)
              work(k1, j1) = x(i1, d1)
            else
              work(k1, j1) = 0.0.toFloat
          }

    work
  }

  private def h5load(weight: Variable, bias: Variable): Unit = {
    val weightData = weight.read
    val weightIndex = weightData.getIndex
    val biasData = bias.read
    val biasIndex = biasData.getIndex
    for(i <- 0 until width)
      for(j <- 0 until inputDim)
        for(x <- 0 until outCh){
          val y = i * inputDim + j
          w(y, x) = weightData.getFloat(weightIndex.set(i, 0, j, x))
          if(y == 0)
            b(x) = biasData.getFloat(biasIndex.set(x))
        }
  }

  override def toString: String = "Convolution1D: {outCh: " + outCh + ", width: " + width + ", inputDim: " + inputDim + ", padding" + padding + "}"

}

object Convolution1D{
  def apply(outCh: Int, width: Int, inputDim: Int, padding: Boolean) = new Convolution1D(outCh, width, inputDim, padding)

  def apply(configs: Map[String, Any], weightGroups: Group): Convolution1D = {
    val layerName = configs("name").toString
    val params = weightGroups.findGroup(layerName)
    val weightNames = params.findAttribute("weight_names")
    val borderMode = configs("border_mode").toString match {
      case "same" => true
      case _ => false
    }
    val weight = params.findVariable(weightNames.getStringValue(0))
    val bias = params.findVariable(weightNames.getStringValue(1))
    val dims = weight.getDimensions
    if(dims.size != 4){
      throw new IllegalArgumentException("invalid dimension for Convolution1D class")
    }

    val c = new Convolution1D(dims.get(3).getLength, dims.get(0).getLength,
      dims.get(2).getLength, borderMode)
    c.h5load(weight,bias)
    c
  }
} 
Example 141
Source File: Scaling.scala    From spark-gp   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.commons.util

import breeze.linalg.DenseVector
import breeze.numerics.sqrt
import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.rdd.RDD

private[ml] trait Scaling {
  def scale(data: RDD[LabeledPoint]) = {
    val x = data.map(x => DenseVector(x.features.toArray)).cache()
    val y = data.map(_.label)
    val n = x.count().toDouble
    val mean = x.reduce(_ + _) / n
    val centered = x.map(_ - mean).cache()
    val variance = centered.map(xx => xx *:* xx).reduce(_ + _) / n
    x.unpersist()
    val varianceNoZeroes = variance.map(v => if (v > 0d) v else 1d)
    val scaled = centered.map(_ /:/ sqrt(varianceNoZeroes)).map(_.toArray).map(Vectors.dense).zip(y).map {
      case(f, y) => LabeledPoint(y, f)
    }.cache()
    if (scaled.count() > 0) // ensure scaled is materialized
      centered.unpersist()
    scaled
  }
} 
Example 142
Source File: RBFKernelTest.scala    From spark-gp   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.commons.kernel

import breeze.linalg.{DenseMatrix, DenseVector, all}
import breeze.numerics.abs
import org.apache.spark.ml.linalg.Vectors
import org.scalatest.FunSuite

class RBFKernelTest extends FunSuite {
  test("Calling `trainingKernel` before `setTrainingVectors` " +
    "yields `TrainingVectorsNotInitializedException") {
    val rbf = new RBFKernel()

    assertThrows[TrainingVectorsNotInitializedException] {
      rbf.trainingKernel()
    }
  }

  test("Calling `derivative` before `setTrainingVectors` " +
    "yields `TrainingVectorsNotInitializedException") {
    val rbf = new RBFKernel()

    assertThrows[TrainingVectorsNotInitializedException] {
      rbf.trainingKernelAndDerivative()
    }
  }

  private val dataset = Array(Array(1d, 2d), Array(2d, 3d), Array(5d, 7d)).map(Vectors.dense)

  test("being called after `setTrainingVector`," +
    " `trainingKernel` should return the correct kernel matrix") {
    val rbf = new RBFKernel(math.sqrt(0.2))
    rbf.setTrainingVectors(dataset)

    val correctKernelMatrix = DenseMatrix((1.000000e+00, 6.737947e-03, 3.053624e-45),
                                          (6.737947e-03, 1.000000e+00, 7.187782e-28),
                                          (3.053624e-45, 7.187782e-28, 1.000000e+00))

    assert(all(abs(rbf.trainingKernel() - correctKernelMatrix) <:< 1e-4))
  }

  private def computationalDerivative(sigma: Double, h: Double) = {
    val rbfLeft = new RBFKernel(sigma - h)
    val rbfRight = new RBFKernel(sigma + h)

    rbfLeft.setTrainingVectors(dataset)
    rbfRight.setTrainingVectors(dataset)

    (rbfRight.trainingKernel() - rbfLeft.trainingKernel()) / (2 * h)
  }

  test("being called after `setTrainingVector`," +
    " `derivative` should return the correct kernel matrix derivative") {
    val rbf = new RBFKernel(0.2)
    rbf.setTrainingVectors(dataset)

    val analytical = rbf.trainingKernelAndDerivative()._2(0)
    val computational = computationalDerivative(0.2, 1e-3)

    assert(all(abs(analytical - computational) <:< 1e-3))
  }

  test("crossKernel returns correct kernel") {
    val rbf = new RBFKernel(math.sqrt(0.2))
    rbf.setTrainingVectors(dataset.drop(1))
    val crossKernel = rbf.crossKernel(dataset.take(1))
    val correctCrossKernel = DenseMatrix((6.737947e-03, 3.053624e-45))
    assert(all(abs(crossKernel - correctCrossKernel) <:< 1e-4))
  }

  test("crossKernel returns correct kernel if called on a single vector") {
    val rbf = new RBFKernel(math.sqrt(0.2))
    rbf.setTrainingVectors(dataset.drop(1))
    val crossKernel = rbf.crossKernel(dataset(0))
    val correctCrossKernel = DenseVector(6.737947e-03, 3.053624e-45).t
    assert(all(abs(crossKernel - correctCrossKernel) <:< 1e-4))
  }
} 
Example 143
Source File: Burden.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.assoc

import breeze.linalg.DenseVector
import breeze.stats.distributions.{Gaussian, StudentsT}
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest}
import org.dizhang.seqspark.util.General._

import scala.language.existentials


@SerialVersionUID(7727280001L)
trait Burden extends AssocMethod {
  def nullModel: NM
  def x: Encode.Fixed
  def result: AssocMethod.Result
}

object Burden {

  def apply(nullModel: NM,
            x: Encode.Coding): Burden with AssocMethod.AnalyticTest = {
    nullModel match {
      case nm: NM.Fitted =>
        AnalyticScoreTest(nm, x.asInstanceOf[Encode.Fixed])
      case _ =>
        AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Fixed])
    }
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Fixed])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    val st = ScoreTest(nm, x.asInstanceOf[Encode.Fixed].coding)
    st.score(0)/st.variance(0,0).sqrt
  }

  def getStatistic(nm: NM, x: DenseVector[Double]): Double = {
    val wt = WaldTest(nm, x)
    (wt.beta /:/ wt.std).apply(1)
  }

  @SerialVersionUID(7727280101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.Fixed)
    extends Burden with AssocMethod.AnalyticTest
  {
    def geno = x.coding
    //val scoreTest = ScoreTest(nullModel, geno)
    val statistic = getStatistic(nullModel, x)
    val pValue = {
      val dis = new Gaussian(0.0, 1.0)
      Some(1.0 - dis.cdf(statistic))
    }

    def result: AssocMethod.BurdenAnalytic = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score")
    }

  }
  case class AnalyticWaldTest(nullModel: NM,
                              x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest {
    def geno = x.coding
    private val wt = WaldTest(nullModel, x.coding)
    val statistic = getStatistic(nullModel, geno)
    val pValue = {
      val dis = new StudentsT(nullModel.dof - 1)
      Some(1.0 - dis.cdf(statistic))
    }
    def result = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}")
    }
  }

  @SerialVersionUID(7727280201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.Fixed)
    extends Burden with AssocMethod.ResamplingTest
  {
    def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    def result: AssocMethod.BurdenResampling = {
      AssocMethod.BurdenResampling(x.vars, refStatistic, pCount)
    }
  }
} 
Example 144
Source File: HypoTest.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{DenseMatrix, DenseVector, inv, *}
import breeze.stats.{mean, variance}


                    ) extends NullModel {
      def dof = y.length - xs.cols + 1
      def residuals = y - estimates
      val invInfo = inv(xs.t * (xs(::, *) *:* b) * a)
    }

    def apply(y: DenseVector[Double], x: Option[DenseMatrix[Double]], fit: Boolean, binary: Boolean): NullModel = {
      x match {
        case Some(dm) => apply(y, dm, fit, binary)
        case None => apply(y, fit, binary)
      }
    }

    def apply(y: DenseVector[Double], fit: Boolean, binary: Boolean): NullModel = {
      if (fit) {
        Fit(y, binary)
      } else {
        Simple(y, binary)
      }
    }

    def apply(reg: Regression): NullModel = {
      val y = reg.responses
      reg match {
        case lr: LogisticRegression =>
          Fitted(y, reg.estimates, reg.xs, 1.0, lr.residualsVariance, binary = true)
        case lr: LinearRegression =>
          Fitted(y, reg.estimates, reg.xs, lr.residualsVariance, DenseVector.ones[Double](y.length), binary = false)
      }
    }

    def apply(y: DenseVector[Double], x: DenseMatrix[Double], fit: Boolean, binary: Boolean): NullModel = {
      if (! fit) {
        Mutiple(y, x, binary)
      } else if (binary) {
        val reg = LogisticRegression(y, x)
        Fitted(y, reg.estimates, reg.xs, 1.0, reg.residualsVariance, binary)
      } else {
        val reg = LinearRegression(y, x)
        Fitted(y, reg.estimates, reg.xs, reg.residualsVariance, DenseVector.ones[Double](y.length), binary)
      }
    }

    def Fit(y: DenseVector[Double], x: DenseMatrix[Double], binary: Boolean): Fitted = {
      apply(y, x, fit = true, binary).asInstanceOf[Fitted]
    }

    def Fit(y: DenseVector[Double], binary: Boolean): Fitted = {
      val my = DenseVector.fill(y.length)(mean(y))
      val residuals = y - my
      val xs = DenseMatrix.ones[Double](y.length, 1)
      val invInfo = DenseMatrix.fill(1,1)(1.0/y.length)
      val a = if (binary) 1.0 else variance(residuals)
      val b = if (binary) my.map(e => e * (1 - e)) else DenseVector.ones[Double](y.length)
      Fitted(y, my, xs, a, b, binary)
    }
  }
} 
Example 145
Source File: LinearCombinationChiSquare.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{DenseVector, sum}
import org.dizhang.seqspark.stat.LinearCombinationChiSquare._


@SerialVersionUID(7778520001L)
trait LinearCombinationChiSquare extends Serializable {
  def lambda: DenseVector[Double]
  def nonCentrality: DenseVector[Double]
  def degreeOfFreedom: DenseVector[Double]
  def cdf(cutoff: Double): CDF

  val meanLambda: Double = sum(lambda)
  val size: Int = lambda.length

}

object LinearCombinationChiSquare {
  @SerialVersionUID(7778550101L)
  trait CDF extends Serializable {
    def pvalue: Double
    def ifault: Int
    def trace: Array[Double]
  }
} 
Example 146
Source File: Resampling.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{DenseVector, shuffle}
import breeze.stats.distributions.Bernoulli
import org.dizhang.seqspark.assoc.Encode
import org.dizhang.seqspark.ds.SemiGroup.PairInt
import org.dizhang.seqspark.stat.HypoTest.NullModel

import scala.language.existentials


  def makeNewNullModel: NullModel.Fitted = {
    val newY = makeNewY()
    val cols = nullModel.xs.cols
    NullModel(
      newY,
      nullModel.xs(::, 1 until cols),
      fit = true,
      binary = nullModel.binary
    ).asInstanceOf[NullModel.Fitted]
  }
} 
Example 147
Source File: ScoreTest.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{*, CSCMatrix, DenseMatrix, DenseVector, SparseVector}
import org.dizhang.seqspark.stat.HypoTest.NullModel.{Fitted => SNM}
import org.dizhang.seqspark.util.General._


object ScoreTest {

  def apply(nm: SNM, x: CSCMatrix[Double]): ScoreTest = {
    Sparse(nm, x)
  }

  def apply(nm: SNM, x: DenseMatrix[Double]): ScoreTest = {
    Dense(nm, x)
  }

  def apply(nm: SNM, x: DenseVector[Double]): ScoreTest = {
    Dense(nm, DenseVector.horzcat(x))
  }

  def apply(nm: SNM, x: SparseVector[Double]): ScoreTest = {
    Sparse(nm, SparseVector.horzcat(x))
  }

  def apply(nm: SNM,
            x1: DenseMatrix[Double],
            x2: CSCMatrix[Double]): ScoreTest = {
    Mixed(nm, x1, x2)
  }

  case class Sparse(nm: SNM,
                    x: CSCMatrix[Double]) extends ScoreTest {
    val score = (nm.residuals.toDenseMatrix * x).toDenseVector / nm.a
    lazy val variance = {
      val c = nm.xs
      val IccInv = nm.invInfo * nm.a
      val Igg = (colMultiply(x, nm.b).t * x).toDense
      val Icg = (c(::, *) *:* nm.b).t * x
      val Igc = Icg.t
      (Igg - Igc * IccInv * Icg) / nm.a
    }
  }

  case class Dense(nm: SNM,
                   x: DenseMatrix[Double]) extends ScoreTest {
    val score = x.t * nm.residuals / nm.a
    lazy val variance = {
      val c = nm.xs
      val IccInv = nm.invInfo * nm.a
      val Igg = (x(::, *) *:* nm.b).t * x
      val Icg = (c(::, *) *:* nm.b).t * x
      val Igc = Icg.t
      (Igg - Igc * IccInv * Icg)/nm.a
    }
  }

  case class Mixed(nm: SNM,
                   x1: DenseMatrix[Double],
                   x2: CSCMatrix[Double]) extends ScoreTest {
    private val dense = Dense(nm, x1)
    private val sparse = Sparse(nm, x2)
    val score = DenseVector.vertcat(dense.score, sparse.score)
    lazy val variance = {
      val v1 = dense.variance
      val v4 = sparse.variance
      val v2 = {
        val c = nm.xs
        val IccInv = nm.invInfo * nm.a
        val Igg = (x1(::, *) *:* nm.b).t * x2
        val Icg = (c(::, *) *:* nm.b).t * x2
        val Igc = x1.t * (c(::, *) *:* nm.b).t
        (Igg - Igc * IccInv * Icg) / nm.a
      }
      val v3 = v2.t
      val v12 = DenseMatrix.horzcat(v1, v2)
      val v34 = DenseMatrix.horzcat(v3, v4)
      DenseMatrix.vertcat(v12, v34)
    }
  }

  case class Mock(score: DenseVector[Double],
                  variance: DenseMatrix[Double]) extends ScoreTest
}

@SerialVersionUID(7778780001L)
sealed trait ScoreTest extends HypoTest {
  def score: DenseVector[Double]
  def variance: DenseMatrix[Double]
} 
Example 148
Source File: Kinship.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import org.apache.spark.rdd.RDD
import org.dizhang.seqspark.ds._
import breeze.linalg.{DenseVector, SparseVector, Vector}
import org.apache.spark.SparkContext

import scala.collection.mutable.ArrayBuffer


  def removeNums(size: Int, nums: IndexedSeq[Int]): IndexedSeq[Int] = {
    var j: Int = 0
    var i: Int = 0
    val res = ArrayBuffer[Int]()
    while (i < size) {
      if (j >= nums.length) {
        res.+=(i)
      } else if (i == nums(j)) {
        j += 1
      } else {
        res.+=(i)
      }
      i += 1
    }
    res.toIndexedSeq
  }

} 
Example 149
Source File: WaldTest.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{DenseMatrix, DenseVector, diag, inv}
import breeze.numerics.sqrt
import breeze.stats.distributions.StudentsT
import org.dizhang.seqspark.stat.HypoTest.NullModel
import org.dizhang.seqspark.stat.HypoTest.NullModel._


trait WaldTest {
  def nm: NullModel
  def x: DenseVector[Double]
  def reg: Regression = {
    nm match {
      case Simple(y, b) =>
        if (b)
          LogisticRegression(y, x.toDenseMatrix.t)
        else
          LinearRegression(y, x.toDenseMatrix.t)
      case Mutiple(y, c, b) =>
        if (b)
          LogisticRegression(y, DenseMatrix.horzcat(x.toDenseMatrix.t, c))
        else
          LinearRegression(y, DenseMatrix.horzcat(x.toDenseMatrix.t, c))
      case Fitted(y, _, xs, _, _, b) =>
        if (b)
          LogisticRegression(y, DenseMatrix.horzcat(x.toDenseMatrix.t, xs(::, 1 until xs.cols)))
        else
          LinearRegression(y, DenseMatrix.horzcat(x.toDenseMatrix.t, xs(::, 1 until xs.cols)))
    }
  }
  def beta: DenseVector[Double] = reg.coefficients
  def std: DenseVector[Double] = {
    sqrt(diag(inv(reg.information)))
  }
  def dof: Int = nm.dof - 1
  def t: DenseVector[Double] = beta /:/ std
  def pValue(oneSided: Boolean = true): DenseVector[Double] = {
    val dis = new StudentsT(dof)
    if (oneSided) {
      t.map(c => 1.0 - dis.cdf(c))
    } else {
      t.map(c => (1.0 - dis.cdf(math.abs(c))) * 2.0)
    }
  }
}

object WaldTest {

  def apply(nm: NullModel, x: DenseVector[Double]): WaldTest = {
    Default(nm, x)
  }

  case class Default(nm: NullModel, x: DenseVector[Double]) extends WaldTest

} 
Example 150
Source File: IntegrateSpec.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.numerics

import breeze.linalg.DenseVector
import breeze.numerics._
import breeze.stats.distributions._
import org.scalatest.{FlatSpec, Matchers}


class IntegrateSpec extends FlatSpec with Matchers {
  def f1(input: DenseVector[Double]): DenseVector[Double] = {
    val dis = new ChiSquared(1.0)
    val dis2 = new ChiSquared(14.0)
    input.map(x => dis.pdf(x) * dis2.cdf(x))
  }
  def sinx(input: DenseVector[Double]): DenseVector[Double] = {
    sin(input)
  }
  def x2(input: DenseVector[Double]): DenseVector[Double] = {
    pow(input, 2.0)
  }

  def time[R](block: => R)(tag: String): R = {
    val t0 = System.nanoTime()
    val result = block    // call-by-name
    val t1 = System.nanoTime()
    println(s"$tag Elapsed time: " + (t1 - t0)/1e6 + "ms")
    result
  }

  "A Integrate" should "be well" in {
    time{
      //val res1 = Integrate(f1, 0.0, 40.0)
      //println(s"chisq df=1 pdf|0,1: $res1")
    }("Chisq")

    //val res2 = Integrate(sinx, 0.0, 1.0)
    //val res3 = Integrate(x2, 0.0, 1.0)
    //println(s"sin(x)|0,1: $res2")
    //println(s"x^2|0,1: $res3")
  }
} 
Example 151
Source File: Qk21Spec.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.numerics

import breeze.linalg.DenseVector
import breeze.numerics.pow
import breeze.stats.distributions.ChiSquared
import org.scalatest.{FlatSpec, Matchers}


class Qk21Spec extends FlatSpec with Matchers {

  val chisq = ChiSquared(1.0)

  def f(input: DenseVector[Double]): DenseVector[Double] = {
    input.map(x => chisq.pdf(x))
  }

  "A Qk21" should "behave well" in {
    //val res = Qk21(f, 0.0, 1.0)
    //println(res)
  }
} 
Example 152
Source File: VisualLogger.scala    From basel-face-pipeline   with Apache License 2.0 5 votes vote down vote up
package registration.utils

import java.awt.Color

import breeze.linalg.DenseVector
import scalismo.geometry._3D
import scalismo.mesh.TriangleMesh
import scalismo.statisticalmodel.StatisticalMeshModel
import scalismo.ui.api._

object VisualLogger {
  var ui : Option[ScalismoUI] = None//Some(ScalismoUI("Visual Logger"))

  val modelGroup = ui.map(_.createGroup("Model"))
  var modelView : Option[StatisticalMeshModelViewControls] = None

  val targetGroup = ui.map(_.createGroup("Target"))
  var targetMeshView : Option[TriangleMeshView] = None



  def showTargetMesh(targetMesh : TriangleMesh[_3D]) : Unit = {
    remove(targetMeshView)
    targetMeshView = show(VisualLogger.targetGroup, targetMesh, "target")
    targetMeshView.map(_.color = Color.RED)
  }

  def showStatisticalShapeModel(ssm : StatisticalMeshModel) : Unit = {
    removeModel(modelView)
    modelView = show(modelGroup, ssm, "gpmodel")
    modelView.map(_.meshView.opacity = 0.7)
  }

  def updateModelView(coeffs : DenseVector[Double]) : Unit = {
    if (modelView.isDefined) {
      modelView.get.shapeModelTransformationView.shapeTransformationView.coefficients = coeffs
    }
  }


  private def show[A](group : Option[Group], t : A, name : String)(implicit sic : ShowInScene[A]): Option[sic.View] = {
    for {
      ui <- ui
      g <- group
    } yield {
      ui.show(g, t, name)
    }
  }

  def remove[V <: ObjectView](view : Option[V]): Unit = {
    view.foreach(_.remove())
  }

  def removeModel(view : Option[StatisticalMeshModelViewControls]): Unit = {
    for {v <- view} {
      v.meshView.remove()
      v.shapeModelTransformationView.remove()
    }
  }

} 
Example 153
Source File: PrepareReferenceLandmarks.scala    From basel-face-pipeline   with Apache License 2.0 5 votes vote down vote up
package preprocessing

import breeze.linalg.{DenseMatrix, DenseVector}
import ch.unibas.cs.gravis.facepipeline.BU3DDataProvider
import ch.unibas.cs.gravis.facepipeline.BU3DDataProvider.Expressions
import ch.unibas.cs.gravis.facepipeline.{DataProvider, PipelineStep}
import scalismo.faces.io.TLMSLandmarksIO
import scalismo.statisticalmodel.MultivariateNormalDistribution

object PrepareReferenceLandmarks {

  def main(args: Array[String]): Unit = {
    scalismo.initialize()

    PrepareReferenceLandmarks(BU3DDataProvider).run()
  }

}

case class PrepareReferenceLandmarks(dataProvider : DataProvider) extends PipelineStep {

  override def run(): Unit = {

    scalismo.initialize()

    val rawRefLmsFile = (dataProvider.repositoryRoot / "data" / "incoming" / "reference" / "landmarks" / "mean2012_l7_bfm_nomouth.tlms").jfile

    val referenceLandmarksTLMS = TLMSLandmarksIO.read3D(rawRefLmsFile).get
    val referenceLandmarks = for (lmTlms <- referenceLandmarksTLMS if lmTlms.visible) yield {
      val lm = lmTlms.toLandmark
      val noiseVariance = lm.id.trim match {
        case lmid if lmid.contains("eyebrow") => 3.0
        case lmid if lmid.contains("eye.bottom") => 3.0
        case lmid if lmid.contains("eye.top") => 3.0
        case _ => 1.0
      }
      lm.copy(uncertainty = Some(MultivariateNormalDistribution(DenseVector.zeros[Double](3), DenseMatrix.eye[Double](3) * noiseVariance)))
    }

    // Transfer the reference landmarks to all the expressions and save them.
    for (expression <- Expressions.expressionModelTypes()) {

      val neutralRef = dataProvider.incoming.reference.loadMesh(dataProvider.Neutral).get
      val expressionRef = dataProvider.registration.loadPriorModel(expression).get.referenceMesh
      val expressionLms = for (lm <- referenceLandmarks) yield {
        val id = neutralRef.pointSet.findClosestPoint(lm.point).id
        lm.copy(point = expressionRef.pointSet.point(id))
      }

      dataProvider.incoming.reference.saveLandmarks(expression, expressionLms)
    }
  }

} 
Example 154
Source File: LshTable.scala    From lsh-scala   with Apache License 2.0 5 votes vote down vote up
package io.krom.lsh

import breeze.linalg.DenseVector

abstract class LshTable(prefix: Option[String] = None) {

  def put(hash: String, label: String, point: DenseVector[Double])
  def get(hash: String): List[(String, String, DenseVector[Double])]
  def update(hash: String, label: String, point: DenseVector[Double])

  protected def createKey(hash: String): String = {
    prefix match {
      case None => hash
      case Some(p) => p + ":" + hash
    }
  }
} 
Example 155
Source File: InMemoryLshTable.scala    From lsh-scala   with Apache License 2.0 5 votes vote down vote up
package io.krom.lsh

import breeze.linalg.DenseVector

import collection.mutable.HashMap
import collection.mutable.HashSet
import scala.collection.mutable

class InMemoryLshTable(prefix: Option[String] = None) extends LshTable(prefix) {

  private val index = new HashMap[String, HashSet[String]]()
  private val table = new HashMap[String, (String, String, DenseVector[Double])]()


  override def put(hash: String, label: String, point: DenseVector[Double]) = {
    val key = createKey(hash)
    val value = (label, key, point)


    if (!index.keySet.contains(key)) index(key) = new HashSet[String]()
    index(key) += label
    table(label) = value
  }

  override def update(hash: String, label: String, point: DenseVector[Double]) = {
    val key = createKey(hash)
    val (_, oldKey, _) = table(label)
    val newValue = (label, key, point)

    table(label) = newValue
    if (key != oldKey) {
      index(oldKey) -= label
      if (!index.keySet.contains(key)) index(key) = new mutable.HashSet[String]()
      index(key) += label
    }
  }

  override def get(hash: String): List[(String, String, DenseVector[Double])] = {
    val key = createKey(hash)

    val items = if (index.keySet.contains(key)) index(key) else new HashSet()

    (for {
      item <- items
    } yield table(item)).toList
  }
}

object InMemoryLshTable {
  def createTables(numTables: Int, prefix: Option[String] = None): IndexedSeq[LshTable] = {
    for {
      _ <- 1 to numTables
    } yield new InMemoryLshTable(prefix)
  }
} 
Example 156
Source File: RedisLshTable.scala    From lsh-scala   with Apache License 2.0 5 votes vote down vote up
package io.krom.lsh

import breeze.linalg.DenseVector
import com.lambdaworks.jacks.JacksMapper
import com.redis.RedisClient

import scala.collection.immutable.HashMap

class RedisLshTable(redisdb: RedisClient, prefix: Option[String] = None) extends LshTable(prefix) {

  override def put(hash: String, label: String, point: DenseVector[Double]): Unit = {
    val key = createKey(hash)
    val value = (label, key, point.toArray)

    redisdb.pipeline { pipe =>
      pipe.sadd(key, label)
      pipe.set(label, JacksMapper.writeValueAsString(value))
    }
  }

  override def update(hash: String, label: String, point: DenseVector[Double]): Unit = {
    val key = createKey(hash)

    val item = redisdb.get(label) match {
      case None => return
      case Some(x:String) => JacksMapper.readValue[(String, String, Array[Double])](x)
    }
    val oldKey = item._2

    val value = (label, key, point.toArray)

    redisdb.pipeline { pipe =>
      pipe.set(label, JacksMapper.writeValueAsString(value))
      if (key != oldKey) pipe.srem(oldKey, label)
      pipe.sadd(key, label)
    }
  }

  override def get(hash: String): List[(String, String, DenseVector[Double])] = {
    val key = createKey(hash)
    val items = redisdb.smembers(key)

    val itemDetails = redisdb.pipeline { pipe =>
      for {
        item <- items.get
        if item.isDefined
      } pipe.get(item.get)
    }

    for {
      item <- itemDetails.get
      newItem = item match {
        case Some(x:String) => Some(JacksMapper.readValue[(String, String, Array[Double])](x))
        case None => None
      }
      if newItem.isDefined
    } yield ( newItem.get._1, newItem.get._2, DenseVector(newItem.get._3) )
  }
}

object RedisLshTable {
  def createTables(numTables: Int, redisConf: HashMap[String, String], prefix: Option[String] = None): IndexedSeq[LshTable] = {
    val redisHost = if (redisConf.contains("host")) redisConf("host") else "localhost"
    val redisPort = if (redisConf.contains("port")) Integer.parseInt(redisConf("port")) else 6379
    for {
      redisDb <- 0 until numTables
    } yield new RedisLshTable(new RedisClient(redisHost, redisPort, redisDb), prefix)
  }
} 
Example 157
Source File: InMemoryLshTableSpec.scala    From lsh-scala   with Apache License 2.0 5 votes vote down vote up
package io.krom.lsh

import breeze.linalg.DenseVector
import org.scalatest.FunSpec
import org.scalatest.Matchers._

class InMemoryLshTableSpec extends FunSpec {

  describe("put without prefix") {
    it("should return the value just added") {

      val testPoint1 = DenseVector(0.1, 0.2)
      val testLabel1 = "point1"

      val testKey = "testhashkey"

      val table = new InMemoryLshTable()

      table.put(testKey, testLabel1, testPoint1)
      table.get(testKey).length should equal (1)
      table.get(testKey)(0) should equal (testLabel1, testKey, testPoint1)
    }
    it("should return multiple results when more than one value is added") {

      val testPoint1 = DenseVector(0.1, 0.2)
      val testLabel1 = "point1"
      val testPoint2 = DenseVector(0.3, 0.4)
      val testLabel2 = "point2"
      val testKey = "testhashkey"

      val table = new InMemoryLshTable()

      table.put(testKey, testLabel1, testPoint1)
      table.put(testKey, testLabel2, testPoint2)
      table.get(testKey).length should equal (2)
      val data = table.get(testKey).sortBy(_._1)
      data(0) should equal (testLabel1, testKey, testPoint1)
      data(1) should equal (testLabel2, testKey, testPoint2)
    }
  }

  describe("put with prefix") {
    it("should return the value just added") {

      val testPoint1 = DenseVector(0.1, 0.2)
      val testLabel1 = "point1"

      val testKey = "testhashkey"
      val testPrefix = "testprefix"

      val table = new InMemoryLshTable(Some(testPrefix))

      table.put(testKey, testLabel1, testPoint1)
      table.get(testKey).length should equal (1)
      table.get(testKey)(0) should equal (testLabel1, testPrefix + ":" + testKey, testPoint1)
    }
    it("should return multiple results when more than one value is added") {

      val testPoint1 = DenseVector(0.1, 0.2)
      val testLabel1 = "point1"
      val testPoint2 = DenseVector(0.3, 0.4)
      val testLabel2 = "point2"
      val testKey = "testhashkey"
      val testPrefix = "testPrefix"

      val table = new InMemoryLshTable(Some(testPrefix))

      table.put(testKey, testLabel1, testPoint1)
      table.put(testKey, testLabel2, testPoint2)
      table.get(testKey).length should equal (2)
      val data = table.get(testKey).sortBy(_._1)
      data(0) should equal (testLabel1, testPrefix + ":" + testKey, testPoint1)
      data(1) should equal (testLabel2, testPrefix + ":" + testKey, testPoint2)
    }
  }

  describe("update") {
    it("should change the value previously stored") {

      val testPoint = DenseVector(0.1, 0.2)
      val testUpdatedPoint = DenseVector(0.3, 0.4)
      val testKey1 = "testkey1"
      val testKey2 = "testkey2"
      val testPrefix = "testPrefix"
      val testLabel = "testData"

      val table = new InMemoryLshTable(Some(testPrefix))

      table.put(testKey1, testLabel, testPoint)
      table.get(testKey1).length should equal (1)
      table.get(testKey1)(0) should equal (testLabel, testPrefix + ":" + testKey1, testPoint)

      table.update(testKey2, testLabel, testUpdatedPoint)
      table.get(testKey1).length should equal (0)
      table.get(testKey2).length should equal (1)
      table.get(testKey2)(0) should equal (testLabel, testPrefix + ":" + testKey2, testUpdatedPoint)
    }
  }
} 
Example 158
Source File: DistanceFunctionSpec.scala    From lsh-scala   with Apache License 2.0 5 votes vote down vote up
package io.krom.lsh

import breeze.linalg.DenseVector

import org.scalatest.{Matchers, FunSpec}

import DistanceFunction._

class DistanceFunctionSpec extends FunSpec with Matchers {

  describe("calculating Euclidean distance score") {
    it("should equal 1 over 1 plus the square root of the sum of the squares of the sides") {
      val point1 = DenseVector[Double](1.0, 0.0)
      val point2 = DenseVector[Double](0.0, 1.0)
      val point3 = DenseVector[Double](3.0, 0.0)

      euclideanDistance(point1, point1) should equal (1.0)
      euclideanDistance(point1, point2) should equal (1.0 / (1.0 + Math.sqrt(2.0)))
      euclideanDistance(point1, point3) should equal (1.0 / (1.0 + Math.sqrt(4.0)))
      euclideanDistance(point2, point3) should equal (1.0 / (1.0 + Math.sqrt(10.0)))
    }
  }
  describe("calculating Cosine distance score") {
    it("should equal 1 minus the cosine of the angle between the vectors") {
      val point1 = DenseVector(1.0, 0.0)
      val point2 = DenseVector(0.0, 1.0)
      val point3 = DenseVector(3.0, 0.0)

      val point4 = DenseVector(2.0, 3.0)
      val point5 = DenseVector(1.0, 1.5)
      val point6 = DenseVector(6.0, 9.0)

      cosineDistance(point1, point1) should equal (1.0)
      cosineDistance(point1, point2) should equal (0.0)
      cosineDistance(point2, point1) should equal (0.0)
      cosineDistance(point1, point3) should equal (1.0)
      cosineDistance(point4, point5) should equal (1.0)
      cosineDistance(point4, point6) should equal (1.0)

      val point7 = DenseVector(-1.0, 0.0)
      val point8 = DenseVector(0.0, -1.0)

      cosineDistance(point1, point7) should equal (1.0)
      cosineDistance(point1, point8) should equal (0.0)
      cosineDistance(point7, point8) should equal (0.0)

      val point9 = DenseVector(0.0, 0.0)
      cosineDistance(point9, point1).isNaN should be (true)
    }
  }
} 
Example 159
Source File: SparkHdfsLR.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkHdfsLR {
  val D = 10   // Number of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val tok = new java.util.StringTokenizer(line, " ")
    var y = tok.nextToken.toDouble
    var x = new Array[Double](D)
    var i = 0
    while (i < D) {
      x(i) = tok.nextToken.toDouble; i += 1
    }
    DataPoint(new DenseVector(x), y)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 2) {
      System.err.println("Usage: SparkHdfsLR <file> <iters>")
      System.exit(1)
    }

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkHdfsLR")
      .getOrCreate()

    val inputPath = args(0)
    val lines = spark.read.textFile(inputPath).rdd

    val points = lines.map(parsePoint).cache()
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)
    spark.stop()
  }
}
// scalastyle:on println 
Example 160
Source File: LocalLR.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{DenseVector, Vector}


object LocalLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- data) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient +=  p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 161
Source File: SparkKMeans.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import breeze.linalg.{squaredDistance, DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkKMeans {

  def parseVector(line: String): Vector[Double] = {
    DenseVector(line.split(' ').map(_.toDouble))
  }

  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 0 until centers.length) {
      val tempDist = squaredDistance(p, centers(i))
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkKMeans")
      .getOrCreate()

    val lines = spark.read.textFile(args(0)).rdd
    val data = lines.map(parseVector _).cache()
    val K = args(1).toInt
    val convergeDist = args(2).toDouble

    val kPoints = data.takeSample(withReplacement = false, K, 42)
    var tempDist = 1.0

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}

      val newPoints = pointStats.map {pair =>
        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()

      tempDist = 0.0
      for (i <- 0 until K) {
        tempDist += squaredDistance(kPoints(i), newPoints(i))
      }

      for (newP <- newPoints) {
        kPoints(newP._1) = newP._2
      }
      println("Finished iteration (delta = " + tempDist + ")")
    }

    println("Final centers:")
    kPoints.foreach(println)
    spark.stop()
  }
}
// scalastyle:on println 
Example 162
Source File: LocalFileLR.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import breeze.linalg.{DenseVector, Vector}


object LocalFileLR {
  val D = 10   // Number of dimensions
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points) {
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }

    println("Final w: " + w)
  }
}
// scalastyle:on println 
Example 163
Source File: SparkLR.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.math.exp

import breeze.linalg.{DenseVector, Vector}

import org.apache.spark.sql.SparkSession


object SparkLR {
  val N = 10000  // Number of data points
  val D = 10   // Number of dimensions
  val R = 0.7  // Scaling factor
  val ITERATIONS = 5
  val rand = new Random(42)

  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
      DataPoint(x, y)
    }
    Array.tabulate(N)(generatePoint)
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use org.apache.spark.ml.classification.LogisticRegression
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val spark = SparkSession
      .builder
      .appName("SparkLR")
      .getOrCreate()

    val numSlices = if (args.length > 0) args(0).toInt else 2
    val points = spark.sparkContext.parallelize(generateData, numSlices).cache()

    // Initialize w to a random value
    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS) {
      println("On iteration " + i)
      val gradient = points.map { p =>
        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
      }.reduce(_ + _)
      w -= gradient
    }

    println("Final w: " + w)

    spark.stop()
  }
}
// scalastyle:on println 
Example 164
Source File: LocalKMeans.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 165
Source File: PosteriorLandmarkingInteractor.scala    From scalismo-ui   with GNU General Public License v3.0 5 votes vote down vote up
package scalismo.ui.control.interactor.landmark.complex.posterior

import breeze.linalg.DenseVector
import scalismo.geometry._
import scalismo.statisticalmodel.MultivariateNormalDistribution
import scalismo.ui.control.interactor.landmark.complex.ComplexLandmarkingInteractor
import scalismo.ui.control.interactor.landmark.complex.ComplexLandmarkingInteractor.Delegate
import scalismo.ui.model._

trait PosteriorLandmarkingInteractor extends ComplexLandmarkingInteractor[PosteriorLandmarkingInteractor] {

  private lazy val nodeVisibility = frame.sceneControl.nodeVisibility

  def previewNode: TriangleMeshNode

  def sourceGpNode: TransformationNode[DiscreteLowRankGpPointTransformation]

  def previewGpNode: TransformationNode[DiscreteLowRankGpPointTransformation]

  def targetUncertaintyGroup: GroupNode

  def targetGroupNode: GroupNode

  def inversePoseTransform: PointTransformation

  override protected def initialDelegate: Delegate[PosteriorLandmarkingInteractor] = {
    PosteriorReadyForCreating.enter()
  }

  def updatePreview(modelLm: LandmarkNode, targetLm: LandmarkNode, mousePosition: Point3D): Unit = {

    targetUncertaintyGroup.genericTransformations.foreach(_.remove())
    targetUncertaintyGroup.genericTransformations.add((_: Point[_3D]) => mousePosition, "mousePosition")

    val lmPointAndId = {
      previewNode.source.pointSet.findClosestPoint(modelLm.source.point)
    }

    val uncertaintyMean = DenseVector(0.0, 0.0, 0.0)
    val uncertaintyCovModelLm = modelLm.uncertainty.value.toMultivariateNormalDistribution.cov
    val uncertaintyCovTargetLm = targetLm.uncertainty.value.toMultivariateNormalDistribution.cov
    val lmUncertainty = MultivariateNormalDistribution(uncertaintyMean, uncertaintyCovModelLm + uncertaintyCovTargetLm)

    // Here, we need to (inverse) transform the mouse position in order to feed an non-rotated deformation vector to the regression
    val coeffs = sourceGpNode.transformation.gp.coefficients(
      IndexedSeq((lmPointAndId.point, inversePoseTransform(mousePosition) - lmPointAndId.point, lmUncertainty))
    )
    previewGpNode.transformation = sourceGpNode.transformation.copy(coeffs)
  }

  def showPreview(): Unit = {
    nodeVisibility.setVisibility(previewNode, frame.perspective.viewports, show = true)
  }

  def hidePreview(): Unit = {
    nodeVisibility.setVisibility(previewNode, frame.perspective.viewports, show = false)
  }

  def initialize(): Unit = {
    previewNode.pickable.value = false
    hidePreview()
  }
} 
Example 166
Source File: LowRankGpPointTransformation.scala    From scalismo-ui   with GNU General Public License v3.0 5 votes vote down vote up
package scalismo.ui.model

import breeze.linalg.DenseVector
import scalismo.common.{DiscreteDomain, NearestNeighborInterpolator}
import scalismo.geometry.{_3D, EuclideanVector, Point}
import scalismo.statisticalmodel.{DiscreteLowRankGaussianProcess, LowRankGaussianProcess}

// This used to be a case class, but since it is extended by the discrete version, it can no longer be.
// Therefore, the copy methods have to be defined manually.
class LowRankGpPointTransformation protected (val gp: LowRankGaussianProcess[_3D, EuclideanVector[_3D]],
                                              val coefficients: DenseVector[Double])
    extends PointTransformation {

  private lazy val vectorField = gp.instance(coefficients)

  override def apply(point: Point[_3D]): Point[_3D] = {
    point + vectorField(point)
  }

  def copy(coefficients: DenseVector[Double]): LowRankGpPointTransformation =
    new LowRankGpPointTransformation(gp, coefficients)
}

object LowRankGpPointTransformation {
  def apply(gp: LowRankGaussianProcess[_3D, EuclideanVector[_3D]],
            coefficients: DenseVector[Double]): LowRankGpPointTransformation =
    new LowRankGpPointTransformation(gp, coefficients)

  def apply(gp: LowRankGaussianProcess[_3D, EuclideanVector[_3D]]): LowRankGpPointTransformation =
    apply(gp, DenseVector.zeros[Double](gp.rank))
}

class DiscreteLowRankGpPointTransformation private (
  val dgp: DiscreteLowRankGaussianProcess[_3D, DiscreteDomain[_3D], EuclideanVector[_3D]],
  gp: LowRankGaussianProcess[_3D, EuclideanVector[_3D]],
  coefficients: DenseVector[Double]
) extends LowRankGpPointTransformation(gp, coefficients) {

  protected def this(dgp: DiscreteLowRankGaussianProcess[_3D, DiscreteDomain[_3D], EuclideanVector[_3D]],
                     coefficients: DenseVector[Double]) = {
    this(dgp, dgp.interpolate(NearestNeighborInterpolator[_3D, EuclideanVector[_3D]]()), coefficients)
  }

  // no need to re-interpolate if the gp didn't change
  override def copy(coefficients: DenseVector[Double]): DiscreteLowRankGpPointTransformation =
    new DiscreteLowRankGpPointTransformation(dgp, gp, coefficients)
}

object DiscreteLowRankGpPointTransformation {
  def apply(
    dgp: DiscreteLowRankGaussianProcess[_3D, DiscreteDomain[_3D], EuclideanVector[_3D]]
  ): DiscreteLowRankGpPointTransformation = apply(dgp, DenseVector.zeros[Double](dgp.rank))

  def apply(dgp: DiscreteLowRankGaussianProcess[_3D, DiscreteDomain[_3D], EuclideanVector[_3D]],
            coefficients: DenseVector[Double]): DiscreteLowRankGpPointTransformation =
    new DiscreteLowRankGpPointTransformation(dgp, coefficients)
} 
Example 167
Source File: QuadraticRenyiEntropy.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.prototype

import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import io.github.mandar2812.dynaml.kernels.DensityKernel



  override def entropy(data: List[DenseVector[Double]]): Double = {
    val dim = data.head.length
    val root_two: breeze.linalg.Vector[Double] = DenseVector.fill(dim, sqrt(2))
    val product = for(i <- data.view; j <- data.view) yield (i, j)
    -1*log_e(product.map((couple) => {
      val point1: DenseVector[Double] = couple._1 / sqrt(2.0)
      val point2: DenseVector[Double] = couple._2 / sqrt(2.0)
      density.eval(point1 - point2)
    }).sum)
  }

  override def entropy[K](data: RDD[(K, LabeledPoint)]): Double = {
    val dim = data.first()._2.features.size
    -1*log_e(data.cartesian(data).map((couple) =>{
      val point1: DenseVector[Double] = DenseVector(couple._1._2.features.toArray) / sqrt(2.0)
      val point2: DenseVector[Double] = DenseVector(couple._2._2.features.toArray) / sqrt(2.0)
      density.eval(point1 - point2)
    }).reduce((a,b) => a + b))
  }

  def entropyDifference(entropy: Double,
                        data: List[DenseVector[Double]],
                        add: DenseVector[Double],
                        remove: DenseVector[Double]): Double = {
    val dim = data.head.length
    val expEntropy = math.exp(-1.0*entropy)

    val product1 = for(i <- data.view) yield (remove, i)
    val subtractEnt = 2*product1.map((couple) => {
      density.eval((couple._1 - couple._2) / sqrt(2.0))
    }).sum - density.eval(DenseVector.zeros(dim))

    val product2 = for(i <- data.view) yield (add, i)
    val addEnt = 2*product2.map((couple) => {
      density.eval((couple._1 - couple._2) / sqrt(2.0))
    }).sum - 2*density.eval((add - remove) / sqrt(2.0)) +
      density.eval(DenseVector.zeros(dim))

    -1.0*log_e(expEntropy + addEnt - subtractEnt) - entropy
  }
} 
Example 168
Source File: Metrics.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.evaluation

import breeze.linalg.DenseVector
import org.apache.spark.rdd.RDD


trait Metrics[P] {

  protected val scoresAndLabels: List[(P, P)]

  protected var name = "Value"

  def print(): Unit

  def generatePlots(): Unit = {}

  def kpi(): DenseVector[P]

  def setName(n: String): this.type = {
    name = n
    this
  }
}

object Metrics{
  def apply(task: String)
           (scoresAndLabels: List[(Double, Double)],
            length: Int, logFlag: Boolean = false)
  : Metrics[Double] = task match {
    case "regression" => new RegressionMetrics(scoresAndLabels, length)
    case "classification" => new BinaryClassificationMetrics(scoresAndLabels, length, logFlag)
  }
}

object MetricsSpark {
  def apply(task: String)
           (scoresAndLabels: RDD[(Double, Double)],
            length: Long,
            minmax: (Double, Double))
  : Metrics[Double] = task match {
    case "regression" => new RegressionMetricsSpark(scoresAndLabels, length)
    case "classification" => new BinaryClassificationMetricsSpark(scoresAndLabels, length, minmax)
  }
} 
Example 169
Source File: RegressionMetricsSpark.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.evaluation

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.graphics.charts.Highcharts._
import org.apache.log4j.{Priority, Logger}
import org.apache.spark.Accumulator
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD

import scalax.chart.module.ChartFactories.{XYBarChart, XYLineChart, XYAreaChart}


    histogram(residuals, numBins = 20)
    title("Histogram of Regression Residuals")
  }

}

object RegressionMetricsSpark {

  def computeKPIs(scoresAndLabels: RDD[(Double, Double)], size: Long)
  : (Double, Double, Double, Double) = {
    val mean: Accumulator[Double] = scoresAndLabels.context.accumulator(0.0, "mean")

    val err:DenseVector[Double] = scoresAndLabels.map((sc) => {
      val diff = sc._1 - sc._2
      mean += sc._2
      val difflog = math.pow(math.log(1 + math.abs(sc._1)) - math.log(math.abs(sc._2) + 1),
        2)
      DenseVector(math.abs(diff), math.pow(diff, 2.0), difflog)
    }).reduce((a,b) => a+b)

    val SS_res = err(1)

    val mu: Broadcast[Double] = scoresAndLabels.context.broadcast(mean.value/size.toDouble)

    val SS_tot = scoresAndLabels.map((sc) => math.pow(sc._2 - mu.value, 2.0)).sum()

    val rmse = math.sqrt(SS_res/size.toDouble)
    val mae = err(0)/size.toDouble
    val rsq = if(1/SS_tot != Double.NaN) 1 - (SS_res/SS_tot) else 0.0
    val rmsle = err(2)/size.toDouble
    (mae, rmse, rsq, rmsle)
  } 
  
} 
Example 170
Source File: VectorIIDProbit.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability

import breeze.linalg.{DenseMatrix, DenseVector, diag}
import breeze.stats.distributions.Gaussian


  override def hessian(y: DenseVector[Double],
                       f: DenseVector[Double]): DenseMatrix[Double] = {
    diag(DenseVector((y.toArray zip f.toArray).map((couple) => {
      val n = standardGaussian.pdf(couple._2)
      val product = couple._1*couple._2
      val l = h(product)
      -1.0*(n*n)/(l*l) - product*n/l
    })))
  }

  override def gaussianExpectation(normalDistParams: (DenseVector[Double],
    DenseVector[Double])): DenseVector[Double] = {
    DenseVector((normalDistParams._1.toArray zip normalDistParams._2.toArray).map((couple) => {
      val gamma = math.sqrt(1.0 + couple._2)
      standardGaussian.pdf(couple._1/gamma)
    }))
  }
} 
Example 171
Source File: StudentsTRV.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.stats.distributions.{ContinuousDistr, Moments, StudentsT}
import io.github.mandar2812.dynaml.algebra.{PartitionedPSDMatrix, PartitionedVector}
import io.github.mandar2812.dynaml.analysis.{PartitionedVectorField, VectorField}
import io.github.mandar2812.dynaml.probability.distributions._
import spire.implicits._
import spire.algebra.Field

abstract class AbstractStudentsTRandVar[
T, V, Distr <: ContinuousDistr[T] with Moments[T, V] with HasErrorBars[T]](mu: Double) extends
  ContinuousRVWithDistr[T, Distr]


case class StudentsTRV(mu: Double, mean: Double, sigma: Double) extends
  AbstractStudentsTRandVar[Double, Double, UnivariateStudentsT](mu) {

  override val underlyingDist = UnivariateStudentsT(mu, mean, sigma)

}

case class MultStudentsTRV(
  mu: Double, mean: DenseVector[Double],
  covariance : DenseMatrix[Double])(
  implicit ev: Field[DenseVector[Double]])
  extends AbstractStudentsTRandVar[DenseVector[Double], DenseMatrix[Double], MultivariateStudentsT](mu) {

  override val underlyingDist: MultivariateStudentsT = MultivariateStudentsT(mu, mean, covariance)
}

object MultStudentsTRV {

  def apply(num_dim: Int)(mu: Double, mean: DenseVector[Double], covariance: DenseMatrix[Double]) = {
    assert(
      num_dim == mean.length,
      "Number of dimensions of vector space must match the number of elements of mean")

    implicit val ev = VectorField(num_dim)

    new MultStudentsTRV(mu, mean, covariance)
  }
}

case class MultStudentsTPRV(
  mu: Double,
  mean: PartitionedVector,
  covariance: PartitionedPSDMatrix)(
  implicit ev: Field[PartitionedVector])
  extends AbstractStudentsTRandVar[PartitionedVector, PartitionedPSDMatrix, BlockedMultivariateStudentsT](mu) {

  override val underlyingDist: BlockedMultivariateStudentsT = BlockedMultivariateStudentsT(mu, mean, covariance)
}

object MultStudentsTPRV {

  def apply(num_dim: Long, nE: Int)(mu: Double, mean: PartitionedVector, covariance: PartitionedPSDMatrix) = {
    assert(
      num_dim == mean.rows,
      "Number of dimensions of vector space must match the number of elements of mean")

    implicit val ev = PartitionedVectorField(num_dim, nE)

    new MultStudentsTPRV(mu, mean, covariance)
  }

}

case class MatrixTRV(
  mu: Double, m: DenseMatrix[Double],
  u: DenseMatrix[Double],
  v: DenseMatrix[Double]) extends
  AbstractStudentsTRandVar[DenseMatrix[Double], (DenseMatrix[Double], DenseMatrix[Double]), MatrixT](mu) {

  override val underlyingDist = MatrixT(mu, m, v, u)
} 
Example 172
Source File: VectorIIDSigmoid.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability

import breeze.linalg.{DenseMatrix, DenseVector, diag}
import breeze.numerics.sigmoid


  override def hessian(y: DenseVector[Double],
                       f: DenseVector[Double]): DenseMatrix[Double] = {
    diag(DenseVector((y.toArray zip f.toArray).map((couple) => {
      val pi = sigmoid(couple._1*couple._2)
      -1.0*pi*(1.0 - pi)
    })))
  }

  override def gaussianExpectation(normalDistParams: (DenseVector[Double],
    DenseVector[Double])): DenseVector[Double] = {
    DenseVector((normalDistParams._1.toArray zip normalDistParams._2.toArray).map((couple) => {
      val gamma = math.sqrt(1.0 + (math.Pi*couple._2/8.0))
      sigmoid(couple._1/gamma)
    }))
  }
} 
Example 173
Source File: BlockedMultivariateStudentsT.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability.distributions

import breeze.linalg.DenseVector
import breeze.numerics._

import math.Pi
import breeze.stats.distributions._
import io.github.mandar2812.dynaml.algebra._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixOps._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixSolvers._
import io.github.mandar2812.dynaml.probability.RandomVariable
import spire.implicits._

import scala.runtime.ScalaRunTime


case class BlockedMultivariateStudentsT(
  mu: Double,
  mean: PartitionedVector,
  covariance: PartitionedPSDMatrix)(implicit rand: RandBasis = Rand) extends
  AbstractContinuousDistr[PartitionedVector] with
  Moments[PartitionedVector, PartitionedPSDMatrix] with
  HasErrorBars[PartitionedVector] {

  require(mu > 2.0, "Degrees of freedom must be greater than 2.0, for a multivariate t distribution to be defined")

  private val chisq = new ChiSquared(mu)

  def draw() = {
    val w = math.sqrt(mu/chisq.draw())
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt
    val z: PartitionedVector = PartitionedVector.rand(mean.rows, nE, RandomVariable(new StudentsT(mu)))*w
    val m: PartitionedVector = root * z
    m + mean
  }

  private lazy val root: LowerTriPartitionedMatrix = bcholesky(covariance)

  override def toString() =  ScalaRunTime._toString(this)

  override def unnormalizedLogPdf(t: PartitionedVector) = {
    val centered: PartitionedVector = t - mean
    val z: PartitionedVector = root \ centered
    val slv: PartitionedVector = root.t \ z

    -0.5*(mu+mean.rows)*log(1.0 + ((slv dot centered) / mu))

  }

  override lazy val logNormalizer = {
    // determinant of the cholesky decomp is the sqrt of the determinant of the cov matrix
    // this is the log det of the cholesky decomp
    val det = bsum(blog(bdiag(root)))
    ((mean.rows/2) * (log(mu) + log(Pi))) + 0.5*det + lgamma(mu/2.0) - lgamma((mu+mean.rows)/2.0)
  }

  def variance = new PartitionedPSDMatrix(
    covariance._underlyingdata.map(c => (c._1, c._2*(mu/(mu-2.0)))),
    covariance.rows, covariance.cols, covariance.rowBlocks, covariance.colBlocks)

  def mode: PartitionedVector = mean

  //TODO: Check and correct calculation of entropy for Mult Students T
  lazy val entropy = {
    bsum(blog(bdiag(root))) + (mean.rows/2.0)*log(mu*Pi) + lbeta(mean.rows/2.0, mu/2.0) - lgamma(mean.rows/2.0) +
      (digamma((mu+mean.rows)/2.0) - digamma(mu/2.0))*(mu+mean.rows)/2.0
  }

  override def confidenceInterval(s: Double) = {

    val signFlag = if(s < 0) -1.0 else 1.0
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt
    val ones = PartitionedVector.ones(mean.rows, nE)
    val multiplier = signFlag*s

    val bar: PartitionedVector = root*(ones*(multiplier*math.sqrt(mu/(mu-2.0))))

    (mean - bar, mean + bar)

  }
} 
Example 174
Source File: MixtureDistribution.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability.distributions

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{ContinuousDistr, Moments, Multinomial}
import spire.algebra.VectorSpace


class MixtureWithConfBars[I, V](
  distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]],
  probabilities: Multinomial[DenseVector[Double], Int])(
  implicit vI: VectorSpace[I, Double]) extends
  MixtureDistribution[I](distributions, probabilities) with
  HasErrorBars[I] {

  private val weightsArr = probabilities.params.toArray

  override def confidenceInterval(s: Double) =
    distributions.zip(weightsArr).map(c => {
      val (lower, upper) = c._1.confidenceInterval(s)

      (vI.timesr(lower, c._2), vI.timesr(upper, c._2))
    }).reduce((a,b) =>
      (vI.plus(a._1, b._1), vI.plus(a._2, b._2))
    )


  def mean = distributions.zip(weightsArr)
    .map(c => vI.timesr(c._1.mean, c._2))
    .reduce((a,b) => vI.plus(a,b))

}

object MixtureWithConfBars {

  def apply[I, V](
    distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]],
    weights: DenseVector[Double])(
    implicit vI: VectorSpace[I, Double]): MixtureWithConfBars[I, V] =
    new MixtureWithConfBars(distributions, new Multinomial[DenseVector[Double], Int](weights))
} 
Example 175
Source File: SparkBlockedVector.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.algebra

import breeze.linalg.{DenseVector, NumericOps}
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import scala.collection.immutable.NumericRange


  def vertcat(vectors: SparkBlockedVector*): SparkBlockedVector = {
    //sanity check
    assert(vectors.map(_.colBlocks).distinct.length == 1,
      "In case of vertical concatenation of matrices their columns sizes must be equal")

    val sizes = vectors.map(_.rowBlocks)
    new SparkBlockedVector(vectors.zipWithIndex.map(couple => {
      val offset = sizes.slice(0, couple._2).sum
      couple._1._data.map(c => (c._1+offset, c._2))
    }).reduceLeft((a,b) => a.union(b)))
  }

} 
Example 176
Source File: MinMaxAccumulator.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.utils

import breeze.linalg.DenseVector
import org.apache.spark.AccumulatorParam

object MinMaxAccumulator extends AccumulatorParam[DenseVector[Double]] {
  def zero(initialValue: DenseVector[Double]): DenseVector[Double] = {
    DenseVector(Double.MaxValue, Double.MinValue)
  }

  def addInPlace(v1: DenseVector[Double], v2: DenseVector[Double]): DenseVector[Double] = {
    v1(0) = math.min(v1(0), v2(0))
    v1(1) = math.max(v1(1), v2(1))
    v1
  }
} 
Example 177
Source File: PCAScaler.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.utils

import breeze.linalg.eig.Eig
import breeze.linalg.{DenseMatrix, DenseVector, eig}
import io.github.mandar2812.dynaml.pipes.{ReversibleScaler, Scaler}


case class PCAScaler(
  center: DenseVector[Double],
  eigenvalues: DenseVector[Double],
  eigenvectors: DenseMatrix[Double]) extends
  ReversibleScaler[DenseVector[Double]] { self =>

  override val i = Scaler((data: DenseVector[Double]) => (eigenvectors*data)+center)

  override def run(data: DenseVector[Double]) = eigenvectors.t*(data-center)

  def apply(r: Range): CompressedPCAScaler = CompressedPCAScaler(
    r,
    self.center, 
    self.eigenvalues, 
    self.eigenvectors)
}

case class CompressedPCAScaler(
  r: Range,
  center: DenseVector[Double],
  eigenvalues: DenseVector[Double],
  eigenvectors: DenseMatrix[Double]
) extends Scaler[DenseVector[Double]] {

  override def run(data: DenseVector[Double]) = {
    val projections = eigenvectors.t*(data-center)
    projections(r)
  }
} 
Example 178
Source File: MetaModel.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.ensemble

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.modelpipe.ModelPipe
import io.github.mandar2812.dynaml.models.Model
import io.github.mandar2812.dynaml.models.gp.GPRegression
import io.github.mandar2812.dynaml.models.neuralnets.FeedForwardNetwork


abstract class MetaModel[
D, D1,
BaseModel <: Model[D1, DenseVector[Double], Double],
Pipe <: ModelPipe[D, D1, DenseVector[Double], Double, BaseModel]
](num: Long, data: D, networks: Pipe*)
  extends Model[D, DenseVector[Double], Double] {

  override protected val g = data

  val baseNetworks: List[BaseModel] =
    networks.toList.map(_(g))

} 
Example 179
Source File: NeuralLayer.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseMatrix, DenseVector}
import io.github.mandar2812.dynaml.pipes.{DataPipe, MetaPipe}


class NeuralLayerFactory[P, I, J](
  metaLocalField: MetaPipe[P, I, J],
  val activationFunc: Activation[J]) extends
  DataPipe[P, NeuralLayer[P, I, J]] {

  override def run(params: P) = NeuralLayer(metaLocalField, activationFunc)(params)
}

class Vec2VecLayerFactory(act: Activation[DenseVector[Double]])(inDim: Int, outDim: Int)
  extends NeuralLayerFactory[
    (DenseMatrix[Double], DenseVector[Double]),
    DenseVector[Double], DenseVector[Double]](
    MetaPipe((p: (DenseMatrix[Double], DenseVector[Double])) => (x: DenseVector[Double]) => p._1*x + p._2),
    act) {
  override def run(params: (DenseMatrix[Double], DenseVector[Double])) = {
    require(
      params._1.cols == inDim && params._1.rows == outDim && params._2.length == outDim,
      "Weight matrix and bias vector sizes must be consistent for a Vector to Vector layer")
    super.run(params)
  }
} 
Example 180
Source File: FeedForwardNetwork.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.DenseVector
import com.tinkerpop.blueprints.Graph
import com.tinkerpop.frames.FramedGraph
import io.github.mandar2812.dynaml.graph.FFNeuralGraph
import io.github.mandar2812.dynaml.optimization.BackPropagation
import io.github.mandar2812.dynaml.pipes.DataPipe



  def test(d: D): Stream[(DenseVector[Double], DenseVector[Double])] = {

    val (procInputs, _) =
      dataAsStream(d)
        .map(c =>
          (c._1.toArray.toList.map(i => List(i)), c._2.toArray.toList.map(i => List(i))))
        .reduce((c1,c2) =>
          (c1._1.zip(c2._1).map(c => c._1++c._2), c1._2.zip(c2._2).map(c => c._1++c._2)))

    val predictedOutputBuffer = params.predictBatch(procInputs)

    //dataAsStream(d).map(rec => (feedForward(rec._1), rec._2))
    dataAsStream(d).map(_._2).zipWithIndex.map(c =>
      (DenseVector.tabulate[Double](outputDimensions)(dim =>
        predictedOutputBuffer(dim)(c._2)),
        c._1)
    )

  }
} 
Example 181
Source File: CommitteeNetwork.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.graph.FFNeuralGraph
import io.github.mandar2812.dynaml.models.LinearModel
import io.github.mandar2812.dynaml.optimization.{BackPropagation, CommitteeModelSolver, RegularizedOptimizer}
import io.github.mandar2812.dynaml.pipes.DataPipe


  def test(d: D): Stream[(DenseVector[Double], DenseVector[Double])] = {
    val (procInputs, _) =
      dataAsStream(d)
        .map(c =>
          (c._1.toArray.toList.map(i => List(i)), c._2.toArray.toList.map(i => List(i))))
        .reduce((c1,c2) =>
          (c1._1.zip(c2._1).map(c => c._1++c._2), c1._2.zip(c2._2).map(c => c._1++c._2)))


    val committeepredictions = baseNetworks.map(network => {
      network.predictBatch(procInputs)
    })

    dataAsStream(d).map(_._2).zipWithIndex.map(c => {
      val votes = DenseVector.tabulate[Double](baseNetworks.length)(Ndim =>
        committeepredictions(Ndim)(0)(c._2))

      val prediction: Double = params dot votes

      (DenseVector(prediction), c._1)
    })
  }
} 
Example 182
Source File: LSSVMCommittee.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.svm

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.modelpipe.DLSSVMPipe
import io.github.mandar2812.dynaml.models.ensemble.CommitteeModel
import io.github.mandar2812.dynaml.optimization._
import org.apache.spark.rdd.RDD


class LSSVMCommittee(num: Long,
                     data: RDD[(DenseVector[Double], Double)],
                     pipes: DLSSVMPipe[RDD[(DenseVector[Double], Double)]]*) extends
  CommitteeModel[RDD[(DenseVector[Double], Double)],
    Stream[(DenseVector[Double], Double)],
    DLSSVM, DLSSVMPipe[RDD[(DenseVector[Double], Double)]]] (num, data, pipes:_*){

  override protected val optimizer: RegularizedOptimizer[
    DenseVector[Double],
    DenseVector[Double], Double,
    RDD[(DenseVector[Double], Double)]] = new RDDCommitteeSolver

  var modelTuners: List[ModelTuner[DLSSVM, DLSSVM]] =
    baseNetworks.map(m => new GridSearch[DLSSVM](m).setGridSize(10).setStepSize(0.1))

  override def learn(): Unit = {
    //First tune and learn the base SVM models
    (baseNetworks zip modelTuners).foreach(modelCouple => {
      val (_, conf) = modelCouple._2.optimize(modelCouple._1.getState, Map())
      modelCouple._1.setState(conf)
      modelCouple._1.learn()
    })
    //Now learn the committee weights
    val fMap = featureMap
    params = optimizer.optimize(num_points,
      g.map(patternCouple => (fMap(patternCouple._1), patternCouple._2)),
      initParams())
  }

} 
Example 183
Source File: StudentTProcessMixture.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.stp

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.algebra.{PartitionedPSDMatrix, PartitionedVector}
import io.github.mandar2812.dynaml.analysis.InnerProductPV
import io.github.mandar2812.dynaml.models.GenContinuousMixtureModel
import io.github.mandar2812.dynaml.probability.MultStudentsTPRV
import io.github.mandar2812.dynaml.probability.distributions.BlockedMultivariateStudentsT
import spire.algebra.VectorSpace

import scala.reflect.ClassTag


class StudentTProcessMixture[T, I: ClassTag](
  override val component_processes: Seq[AbstractSTPRegressionModel[T, I]],
  override val weights: DenseVector[Double]) extends
  GenContinuousMixtureModel[
    T, I, Double, PartitionedVector,
    PartitionedPSDMatrix, BlockedMultivariateStudentsT,
    MultStudentsTPRV, AbstractSTPRegressionModel[T, I]](
    component_processes, weights) {

  protected val blockSize: Int = component_processes.head._blockSize

  override protected def toStream(y: PartitionedVector): Stream[Double] = y.toStream

  override protected def getVectorSpace(num_dim: Int): VectorSpace[PartitionedVector, Double] =
    InnerProductPV(num_dim, blockSize)
} 
Example 184
Source File: StudentTRegression.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.stp

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.evaluation.RegressionMetrics
import io.github.mandar2812.dynaml.kernels.{DiracKernel, LocalScalarKernel}
import io.github.mandar2812.dynaml.pipes.{DataPipe, StreamDataPipe}


  override def energy(h: Map[String, Double],
                      options: Map[String, String]): Double = validationSet.length match {
    case 0 => super.energy(h, options)
    case _ =>
      // Calculate regression metrics on validation set
      // Return some function of kpi as energy
      setState(h)
      val resultsToScores = DataPipe(
        (res: Seq[(DenseVector[Double], Double, Double, Double, Double)]) =>
          res.map(i => (i._3, i._2)).toStream)

      (resultsToScores >
        processTargets >
        scoresToEnergy) run
        this.test(validationSet)
  }

} 
Example 185
Source File: MVTMixture.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.stp

import breeze.linalg.{DenseMatrix, DenseVector}
import io.github.mandar2812.dynaml.analysis.MatrixVectorSpace
import io.github.mandar2812.dynaml.models.GenContinuousMixtureModel
import io.github.mandar2812.dynaml.probability.MatrixTRV
import io.github.mandar2812.dynaml.probability.distributions.MatrixT
import spire.algebra.VectorSpace

import scala.reflect.ClassTag


class MVTMixture[T, I: ClassTag](
  override val component_processes: Seq[MVStudentsTModel[T, I]],
  override val weights: DenseVector[Double]) extends
  GenContinuousMixtureModel[
    T, I, DenseVector[Double], DenseMatrix[Double],
    (DenseMatrix[Double], DenseMatrix[Double]), MatrixT,
    MatrixTRV, MVStudentsTModel[T, I]](component_processes, weights) {

  val num_outputs: Int = component_processes.head.num_outputs

  override protected def toStream(y: DenseMatrix[Double]): Stream[DenseVector[Double]] =
    (0 until y.rows).toStream.map(index => y(index,::).t)

  override protected def getVectorSpace(num_dim: Int): VectorSpace[DenseMatrix[Double], Double] =
    MatrixVectorSpace(num_dim, num_outputs)
} 
Example 186
Source File: SparkGLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

import breeze.linalg.{DenseMatrix, DenseVector}
import io.github.mandar2812.dynaml.optimization.{RegularizedLSSolver, RegularizedOptimizer}
import org.apache.spark.rdd.RDD


  override def prepareData(d: RDD[(DenseVector[Double], Double)]) = {

    val phi = featureMap
    val mapFunc = (xy: (DenseVector[Double], Double)) => {
      val phiX = DenseVector(phi(xy._1).toArray ++ Array(1.0))
      val phiY = phiX*xy._2
      (phiX*phiX.t, phiY)
    }

    d.mapPartitions((partition) => {
      Iterator(partition.map(mapFunc).reduce((a,b) => (a._1+b._1, a._2+b._2)))
    }).reduce((a,b) => (a._1+b._1, a._2+b._2))
  }
} 
Example 187
Source File: SparkLogisticGLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

//Breeze Imports
import breeze.linalg.DenseVector
import breeze.numerics.sigmoid
import breeze.stats.distributions.Gaussian
import io.github.mandar2812.dynaml.optimization.ProbitGradient
import org.apache.spark.mllib.linalg.Vectors
//DynaML Imports
import io.github.mandar2812.dynaml.optimization.{
GradientDescentSpark, LogisticGradient,
RegularizedOptimizer, SquaredL2Updater}
//Spark Imports
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD


class SparkProbitGLM(
  data: RDD[(DenseVector[Double], Double)], numPoints: Long,
  map: (DenseVector[Double]) => DenseVector[Double] =
  identity[DenseVector[Double]]) extends SparkLogisticGLM(data, numPoints, map) {

  private val standardGaussian = new Gaussian(0, 1.0)

  override val h: (Double) => Double = (x: Double) => standardGaussian.cdf(x)

  override protected val optimizer: RegularizedOptimizer[
    DenseVector[Double], DenseVector[Double],
    Double, RDD[LabeledPoint]] = new GradientDescentSpark(new ProbitGradient, new SquaredL2Updater)

} 
Example 188
Source File: LogisticGLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

import breeze.linalg.DenseVector
import breeze.numerics._
import breeze.stats.distributions.Gaussian
import io.github.mandar2812.dynaml.optimization._


class ProbitGLM(data: Stream[(DenseVector[Double], Double)],
                numPoints: Int,
                map: (DenseVector[Double]) => DenseVector[Double] =
                identity[DenseVector[Double]])
  extends LogisticGLM(data, numPoints, map) {

  private val standardGaussian = new Gaussian(0, 1.0)

  override val h = (x: Double) =>
    standardGaussian.cdf(x)

  override protected val optimizer =
    new GradientDescent(
      new ProbitGradient,
      new SquaredL2Updater)

} 
Example 189
Source File: RegularizedGLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

import breeze.linalg.{DenseMatrix, DenseVector}
import io.github.mandar2812.dynaml.optimization.{GloballyOptimizable,
RegularizedLSSolver, RegularizedOptimizer}


class RegularizedGLM(data: Stream[(DenseVector[Double], Double)],
                     numPoints: Int,
                     map: (DenseVector[Double]) => DenseVector[Double] =
                     identity[DenseVector[Double]])
  extends GeneralizedLinearModel[(DenseMatrix[Double], DenseVector[Double])](data, numPoints, map)
    with GloballyOptimizable {

  override val task = "regression"

  override protected val optimizer: RegularizedOptimizer[DenseVector[Double],
    DenseVector[Double], Double, (DenseMatrix[Double], DenseVector[Double])] =
    new RegularizedLSSolver


  override def prepareData(d: Stream[(DenseVector[Double], Double)]) = {
    val designMatrix = DenseMatrix.vertcat[Double](
      d.map(point => DenseVector(featureMap(point._1).toArray ++ Array(1.0)).toDenseMatrix):_*
    )

    val responseVector = DenseVector.vertcat(
      d.map(p => DenseVector(p._2)):_*
    )

    (designMatrix.t*designMatrix, designMatrix.t*responseVector)
  }
} 
Example 190
Source File: GPNarModel.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.gp

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.kernels.LocalScalarKernel
import io.github.mandar2812.dynaml.pipes.DataPipe

import scala.annotation.tailrec
import scala.collection.mutable.{MutableList => ML}


class GPNarModel(order: Int,
                 cov: LocalScalarKernel[DenseVector[Double]],
                 nL: LocalScalarKernel[DenseVector[Double]],
                 trainingdata: Seq[(DenseVector[Double], Double)],
                 meanFunc: DataPipe[DenseVector[Double], Double] = DataPipe(_ => 0.0)) extends
GPRegression(cov, nL, trainingdata, meanFunc) {

  val modelOrder = order

  def modelPredictedOutput(n: Int)(input: DenseVector[Double]):
  Seq[(Double, Double, Double)] = {
    assert(modelOrder == input.length, "Model order must be equal to dimension of input")

    @tailrec
    def predictAheadRec(num: Int, features: DenseVector[Double],
                        predictions: ML[(Double, Double, Double)]):
    Seq[(Double, Double, Double)] =
      num match {
        case 0 => predictions.toSeq
        case _ =>
          val pred: (DenseVector[Double], Double, Double, Double) =
            predictionWithErrorBars[Seq[DenseVector[Double]]](Seq(features), 2).head
          val newFeatures = DenseVector(features(1 until modelOrder).toArray ++ Array(pred._2))

          predictAheadRec(num-1, newFeatures, predictions.+=:((pred._2, pred._3, pred._4)))
      }

    predictAheadRec(n, input, ML())
  }
} 
Example 191
Source File: MOGPRegressionModel.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.gp

import breeze.linalg.{DenseMatrix, DenseVector}
import io.github.mandar2812.dynaml.kernels.LocalScalarKernel
import io.github.mandar2812.dynaml.pipes.{DataPipe, DataPipe2}
import io.github.mandar2812.dynaml.probability.distributions.MatrixNormal
import org.apache.log4j.Logger


  override def dataAsSeq(data: Stream[(I, DenseVector[Double])]): Seq[((I, Int), Double)] =
    data.map((patternAndLabel) =>
      patternAndLabel._2.mapPairs((i, label) =>
        ((patternAndLabel._1, i), label)
      ).toArray.toSeq).reduceLeft((s1, s2) => s1 ++ s2)
}


class KroneckerMOGPModel[I](
  covFunc: LocalScalarKernel[I], noiseCovFunc: LocalScalarKernel[I], coRegCov: LocalScalarKernel[Int],
  data: Stream[(I, DenseVector[Double])], num: Int, numOutputs: Int,
  meanFunc: DataPipe[(I, Int), Double] = DataPipe((_: (I, Int)) => 0.0)) extends
  MOGPRegressionModel[I](covFunc:*coRegCov, noiseCovFunc:* coRegCov, data, num, numOutputs, meanFunc) {

  val (covFPipe, noiseCovPipe, coRegCovPipe) = (covFunc.asPipe, noiseCovFunc.asPipe, coRegCov.asPipe)

  override def energy(h: Map[String, Double], options: Map[String, String]): Double = {
    setState(h)

    val (features, targets) = data.unzip

    val covMatrix: DenseMatrix[Double] =
      covFunc
        .buildKernelMatrix(features, features.length)
        .getKernelMatrix()

    val noiseMatrix: DenseMatrix[Double] =
      noiseCovFunc
        .buildKernelMatrix(features, features.length)
        .getKernelMatrix()

    val colCovMatrix = coRegCov
      .buildKernelMatrix(0 until noutputs, noutputs)
      .getKernelMatrix()

    val meanMat: DenseMatrix[Double] = DenseMatrix.vertcat(
      features.map(instance =>
        DenseVector.tabulate[Double](noutputs)(o => mean((instance, o))).asDenseMatrix):_*
    )

    val mvn = MatrixNormal(meanMat, covMatrix+noiseMatrix, colCovMatrix)

    -mvn.logPdf(DenseMatrix.vertcat(targets.map(_.asDenseMatrix):_*))
  }
} 
Example 192
Source File: GPNarXModel.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.gp

import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.kernels.LocalScalarKernel
import io.github.mandar2812.dynaml.pipes.DataPipe


class GPNarXModel(
  order: Int,
  ex: Int,
  cov: LocalScalarKernel[DenseVector[Double]],
  nL: LocalScalarKernel[DenseVector[Double]],
  trainingdata: Seq[(DenseVector[Double], Double)],
  meanFunc: DataPipe[DenseVector[Double], Double] = DataPipe(_ => 0.0)) extends
GPRegression(cov, nL, trainingdata, meanFunc) {

  val modelOrder = order

  val exogenousInputs = ex

} 
Example 193
Source File: GPBasisFuncRegressionModel.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.gp

import breeze.linalg.{DenseMatrix, DenseVector, cholesky, trace, inv}
import breeze.numerics.{log, sqrt}
import io.github.mandar2812.dynaml.algebra._
import io.github.mandar2812.dynaml.analysis._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixOps._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixSolvers._
import io.github.mandar2812.dynaml.kernels._
import io.github.mandar2812.dynaml.models.{ContinuousProcessModel, SecondOrderProcessModel}
import io.github.mandar2812.dynaml.optimization.GloballyOptWithGrad
import io.github.mandar2812.dynaml.pipes.{DataPipe, DataPipe2}
import io.github.mandar2812.dynaml.probability.{MultGaussianPRV, MultGaussianRV}
import org.apache.log4j.Logger

import scala.reflect.ClassTag


abstract class GPBasisFuncRegressionModel[T, I: ClassTag](
  cov: LocalScalarKernel[I], n: LocalScalarKernel[I],
  data: T, num: Int, basisFunc: DataPipe[I, DenseVector[Double]],
  basis_param_prior: MultGaussianRV) extends AbstractGPRegressionModel[T, I](
  cov, n, data, num) {

  val MultGaussianRV(b, covB) = basis_param_prior

  implicit val vf = VectorField(b.length)

  private lazy val lowB = cholesky(covB)

  private lazy val covBsolveb = lowB.t \ (lowB \ b)

  private lazy val h: PartitionedMatrix = PartitionedMatrix.horzcat(_blockSize)(trainingData.map(basisFunc(_)):_*)

  override val mean: DataPipe[I, Double] = basisFunc > DataPipe((h: DenseVector[Double]) => h.t * b)

  private val basisFeatureMap: DataPipe[I, DenseVector[Double]] = basisFunc > DataPipe((x: DenseVector[Double]) => lowB*x)

  val feature_map_cov = CovarianceFunction(basisFunc > DataPipe((x: DenseVector[Double]) => lowB*x))

  override protected def getTrainKernelMatrix[U <: Seq[I]] = {
    SVMKernel.buildPartitionedKernelMatrix(trainingData,
      trainingData.length, _blockSize, _blockSize,
      (x: I, y: I) => {covariance.evaluate(x, y) + feature_map_cov.evaluate(x, y) + noiseModel.evaluate(x, y)}
    )
  }

  override protected def getCrossKernelMatrix[U <: Seq[I]](test: U) =
    SVMKernel.crossPartitonedKernelMatrix(
      trainingData, test, _blockSize, _blockSize,
      (x: I, y: I) => {covariance.evaluate(x, y) + feature_map_cov.evaluate(x, y)}
    )

  override protected def getTestKernelMatrix[U <: Seq[I]](test: U) =
    SVMKernel.buildPartitionedKernelMatrix(
      test, test.length.toLong,
      _blockSize, _blockSize,
      (x: I, y: I) => {covariance.evaluate(x, y) + feature_map_cov.evaluate(x, y)}
    )


} 
Example 194
Source File: GLMPipe.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.modelpipe

import breeze.linalg.{DenseMatrix, DenseVector}
import io.github.mandar2812.dynaml.algebra.PartitionedPSDMatrix
import io.github.mandar2812.dynaml.kernels.LocalScalarKernel
import io.github.mandar2812.dynaml.models.lm.{GeneralizedLeastSquaresModel, GeneralizedLinearModel, GenericGLM, SparkGLM}
import io.github.mandar2812.dynaml.pipes.{DataPipe, DataPipe2, DataPipe3}
import org.apache.spark.rdd.RDD


object SparkGLMPipe2 extends DataPipe2[
  RDD[(DenseVector[Double], Double)],
  DataPipe[DenseVector[Double], DenseVector[Double]],
  GenericGLM[
    RDD[(DenseVector[Double], Double)],
    (DenseMatrix[Double], DenseVector[Double])]] {


  override def run(
    data1: RDD[(DenseVector[Double], Double)],
    data2: DataPipe[DenseVector[Double], DenseVector[Double]]) = {

    val length = data1.count()
    new SparkGLM(data1, length, data2.run)
  }
} 
Example 195
Source File: DLSSVMPipe.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.modelpipe

import io.github.mandar2812.dynaml.pipes.DataPipe
import breeze.linalg.DenseVector
import io.github.mandar2812.dynaml.kernels.LocalScalarKernel
import io.github.mandar2812.dynaml.models.svm.DLSSVM


class DLSSVMPipe[Source](pre: DataPipe[Source, Stream[(DenseVector[Double], Double)]],
                         cov: LocalScalarKernel[DenseVector[Double]],
                         task: String = "regression") extends
  ModelPipe[Source, Stream[(DenseVector[Double], Double)],
    DenseVector[Double], Double, DLSSVM] {

  override val preProcess = pre

  override def run(data: Source) = {
    val training = preProcess(data)
    new DLSSVM(training, training.length, cov, task)
  }
} 
Example 196
Source File: MixturePipe.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.modelpipe

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.stats.distributions.{ContinuousDistr, Moments}
import io.github.mandar2812.dynaml.algebra.{PartitionedPSDMatrix, PartitionedVector}
import io.github.mandar2812.dynaml.models.gp.AbstractGPRegressionModel
import io.github.mandar2812.dynaml.models.stp.{AbstractSTPRegressionModel, MVStudentsTModel}
import io.github.mandar2812.dynaml.models.{
ContinuousProcessModel, GenContinuousMixtureModel,
SecondOrderProcessModel, StochasticProcessMixtureModel}
import io.github.mandar2812.dynaml.optimization.GloballyOptimizable
import io.github.mandar2812.dynaml.pipes.DataPipe2
import io.github.mandar2812.dynaml.probability.{ContinuousRVWithDistr, MatrixTRV, MultGaussianPRV, MultStudentsTPRV}
import io.github.mandar2812.dynaml.probability.distributions.{
BlockedMultiVariateGaussian, BlockedMultivariateStudentsT,
HasErrorBars, MatrixT}

import scala.reflect.ClassTag


abstract class MixturePipe[
T, I: ClassTag, Y, YDomain, YDomainVar,
BaseDistr <: ContinuousDistr[YDomain]
  with Moments[YDomain, YDomainVar]
  with HasErrorBars[YDomain],
W1 <: ContinuousRVWithDistr[YDomain, BaseDistr],
BaseProcess <: ContinuousProcessModel[T, I, Y, W1]
  with SecondOrderProcessModel[T, I, Y, Double, DenseMatrix[Double], W1]
  with GloballyOptimizable] extends
  DataPipe2[Seq[BaseProcess], DenseVector[Double],
    GenContinuousMixtureModel[
      T, I, Y, YDomain, YDomainVar,
      BaseDistr, W1, BaseProcess]]


class GPMixturePipe[T, I: ClassTag] extends
  MixturePipe[T, I, Double, PartitionedVector, PartitionedPSDMatrix,
    BlockedMultiVariateGaussian, MultGaussianPRV,
    AbstractGPRegressionModel[T, I]] {

  override def run(
    models: Seq[AbstractGPRegressionModel[T, I]],
    weights: DenseVector[Double]) =
    StochasticProcessMixtureModel(models, weights)
}

class StudentTMixturePipe[T, I: ClassTag] extends
  MixturePipe[T, I, Double, PartitionedVector, PartitionedPSDMatrix,
    BlockedMultivariateStudentsT, MultStudentsTPRV,
    AbstractSTPRegressionModel[T, I]] {

  override def run(
    models: Seq[AbstractSTPRegressionModel[T, I]],
    weights: DenseVector[Double]) =
    StochasticProcessMixtureModel(models, weights)
}

class MVStudentsTMixturePipe[T, I: ClassTag] extends
  MixturePipe[
    T, I, DenseVector[Double], DenseMatrix[Double],
    (DenseMatrix[Double], DenseMatrix[Double]),
    MatrixT, MatrixTRV,
    MVStudentsTModel[T, I]] {

  override def run(
    models: Seq[MVStudentsTModel[T, I]],
    weights: DenseVector[Double]) =
    StochasticProcessMixtureModel(models, weights)
} 
Example 197
Source File: RegularizedLSSolver.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.optimization

import breeze.linalg.{DenseMatrix, DenseVector}


  override def optimize(nPoints: Long,
                        ParamOutEdges: (DenseMatrix[Double], DenseVector[Double]),
                        initialP: DenseVector[Double]): DenseVector[Double] = {

    val (designMatrix,labels) = ParamOutEdges
    val smoother = DenseMatrix.tabulate[Double](initialP.length, initialP.length)((i,j) => {
      if(i != j) 0.0 else if(i < initialP.length-1) regParam else 1.0
    })
    //Construct matrix A and b block by block
    val A = designMatrix + smoother
    val b = labels

    A\b
  }
} 
Example 198
Source File: LaplacePosteriorMode.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.optimization

import breeze.linalg.{DenseMatrix, DenseVector, cholesky, inv}
import breeze.numerics.sqrt
import io.github.mandar2812.dynaml.DynaMLPipe._
import io.github.mandar2812.dynaml.pipes.DataPipe
import io.github.mandar2812.dynaml.probability.Likelihood


  override def optimize(nPoints: Long,
                        ParamOutEdges: (DenseMatrix[Double], DenseVector[Double]),
                        initialP: DenseVector[Double]): DenseVector[Double] =
    LaplacePosteriorMode.run(
      nPoints, ParamOutEdges,
      this.likelihood, initialP,
      this.numIterations,
      identityPipe[(DenseMatrix[Double], DenseVector[Double])])
}

object LaplacePosteriorMode {

  def run[T](nPoints: Long, data: T,
             likelihood: Likelihood[
               DenseVector[Double], DenseVector[Double], DenseMatrix[Double],
               (DenseVector[Double], DenseVector[Double])],
             initialP: DenseVector[Double], numIterations: Int,
             transform: DataPipe[T, (DenseMatrix[Double], DenseVector[Double])]): DenseVector[Double] = {

    val (kMat, y) = transform(data)
    var mode = initialP

    var b = DenseVector.zeros[Double](y.length)
    var a = DenseVector.zeros[Double](y.length)

    val id = DenseMatrix.eye[Double](y.length)

    (1 to numIterations).foreach{ iter =>
      val wMat = likelihood.hessian(y, mode) * -1.0
      val wMatsq = sqrt(wMat)

      val L = cholesky(id + wMatsq*kMat*wMatsq)
      b = wMat*mode + likelihood.gradient(y, mode)
      val buff1 = wMatsq*kMat*b
      val buff2 = inv(L)*buff1

      a = b - inv(wMatsq*L.t)*buff2
      mode = kMat*a
    }

    mode

  }
} 
Example 199
Source File: QuasiNewtonOptimizer.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.optimization

import breeze.linalg.{DenseMatrix, DenseVector, inv}
import io.github.mandar2812.dynaml.pipes.DataPipe
import org.apache.log4j.Logger
import spire.implicits._


  override def optimize(
    nPoints: Long,
    ParamOutEdges: Stream[(DenseVector[Double], Double)],
    initialP: DenseVector[Double]): DenseVector[Double] =
    QuasiNewtonOptimizer.run(
      nPoints, this.regParam, this.numIterations,
      updater, gradient, this.stepSize, initialP,
      ParamOutEdges, DataPipe(identity[Stream[(DenseVector[Double], Double)]] _)
    )
}

object QuasiNewtonOptimizer {

  private val logger = Logger.getLogger(this.getClass)

  def run[T](
    nPoints: Long, regParam: Double, numIterations: Int,
    updater: HessianUpdater, gradient: Gradient, stepSize: Double,
    initial: DenseVector[Double], POutEdges: T,
    transform: DataPipe[T, Stream[(DenseVector[Double], Double)]],
    logging: Boolean = true, logging_rate: Int = 100): DenseVector[Double] = {

    var oldW: DenseVector[Double] = initial

    var newW = oldW
    val hessian = transform(POutEdges)
      .map(_._1)
      .map(x => DenseVector(x.toArray ++ Array(1.0)))
      .map(x => x*x.t)
      .reduce((x: DenseMatrix[Double],
               y: DenseMatrix[Double]) =>
        x + y)

    var regInvHessian = inv(hessian + DenseMatrix.eye[Double](initial.length)*regParam)
    var oldCumGradient = DenseVector.zeros[Double](initial.length)

    println("Performing Quasi-Newton Optimization")
    cfor(1)(iter => iter < numIterations, iter => iter + 1)( iter => {
      val cumGradient: DenseVector[Double] = DenseVector.zeros(initial.length)
      var cumLoss: Double = 0.0
      transform(POutEdges).foreach(ed => {
        val x = DenseVector(ed._1.toArray ++ Array(1.0))
        val y = ed._2
        cumLoss += gradient.compute(x, y, oldW, cumGradient)
      })

      if(logging && iter % logging_rate == 0) RegularizedOptimizer.prettyPrint(iter, cumLoss/nPoints.toDouble)

      //Find the search direction p = inv(H)*grad(J)
      //perform update x_new = x + step*p
      val searchDirection = regInvHessian*cumGradient*(-1.0)
      newW = updater.compute(oldW, searchDirection, stepSize, iter, regParam)._1

      regInvHessian = updater.hessianUpdate(regInvHessian, newW-oldW, cumGradient-oldCumGradient)
      oldW = newW
      oldCumGradient = cumGradient
    })
    newW
  }
} 
Example 200
Source File: CommitteeModelSolver.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.optimization

import breeze.linalg.{DenseMatrix, DenseVector, inv}
import org.apache.spark.rdd.RDD


  override def optimize(nPoints: Long,
                        ParamOutEdges: RDD[(DenseVector[Double], Double)],
                        initialP: DenseVector[Double]): DenseVector[Double] = {

    val sumMat = ParamOutEdges.map(couple => {
      val diff = couple._1 - DenseVector.fill[Double](couple._1.length)(couple._2)
      diff * diff.t
    }).reduce((mat1, mat2) => mat1+mat2)

    sumMat :/= nPoints.toDouble
    val ones = DenseVector.ones[Double](initialP.length)
    val invMat = inv(sumMat + DenseMatrix.eye[Double](initialP.length)*regParam)
    val ans: DenseVector[Double] = invMat*ones
    val Z: Double = ones dot ans
    ans/Z
  }
}