scala.math.min Scala Examples

The following examples show how to use scala.math.min. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: MaxAbsScalerModel.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.feature

import ml.combust.mleap.core.Model
import ml.combust.mleap.core.annotation.SparkCode
import ml.combust.mleap.core.types.{StructType, TensorType}
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}

import scala.math.{max, min}


@SparkCode(uri = "https://github.com/apache/spark/blob/v2.0.0/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala")
case class MaxAbsScalerModel(maxAbs: Vector) extends Model {
  def apply(vector: Vector): Vector = {
    val maxAbsUnzero = Vectors.dense(maxAbs.toArray.map(x => if (x == 0) 1 else x))

    vector match {
      case DenseVector(values) =>
        val vs = values.clone()
        val size = vs.length
        var i = 0

        while (i < size) {
          if (!values(i).isNaN) {
            val rescale = max(-1.0, min(1.0, values(i) / maxAbsUnzero(i)))
            vs(i) = rescale
          }
          i += 1
        }
        Vectors.dense(vs)
      case SparseVector(size, indices, values) =>
        val vs = values.clone()
        val nnz = vs.length
        var i = 0
        while (i < nnz) {
          val raw = max(-1.0, min(1.0, values(i) / maxAbsUnzero(indices(i))))

          vs(i) = raw
          i += 1
        }
        Vectors.sparse(size, indices, vs)
    }
  }

  override def inputSchema: StructType = StructType("input" -> TensorType.Double(maxAbs.size)).get

  override def outputSchema: StructType = StructType("output" -> TensorType.Double(maxAbs.size)).get

} 
Example 2
Source File: MinMaxScalerModel.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.feature

import ml.combust.mleap.core.Model
import ml.combust.mleap.core.annotation.SparkCode
import ml.combust.mleap.core.types.{StructType, TensorType}
import org.apache.spark.ml.linalg.mleap.VectorUtil._
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}

import scala.math.{max, min}


  def apply(vector: Vector): Vector = {
    val scale = maxValue - minValue

    // 0 in sparse vector will probably be rescaled to non-zero
    val values = vector.copy.toArray
    val size = values.length
    var i = 0
    while (i < size) {
      if (!values(i).isNaN) {
        val raw = if (originalRange(i) != 0) (values(i) - minArray(i)) / originalRange(i) else 0.5
        values(i) = raw * scale + minValue
      }
      i += 1
    }
    Vectors.dense(values)
  }

  override def inputSchema: StructType = StructType("input" -> TensorType.Double(originalRange.length)).get

  override def outputSchema: StructType = StructType("output" -> TensorType.Double(originalRange.length)).get

} 
Example 3
Source File: WeightedLevenshtein.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.annotators.spell.context

import com.github.liblevenshtein.transducer.{Candidate, ITransducer}
import com.johnsnowlabs.nlp.annotators.spell.context.parser.RegexParser

import scala.collection.mutable
import scala.io.Codec
import scala.math.min

trait WeightedLevenshtein {

  def levenshteinDist(s11: String, s22: String)(cost:(String, String) => Float): Float = {

    // cope with start of string
    val s1 = s"^${s11}_"
    val s2 = s"^${s22}_"

    val s1_ = s"_^${s11}_"
    val s2_ = s"_^${s22}_"

    val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f }

    for (j <- 1 to s2.length; i <- 1 to s1.length)
      dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1)
      else {
        minimum(dist(j - 1)(i) + cost(s2_.substring(j - 1, j + 1), s1(i - 1) + "Ɛ"),   //insert in s1
          dist(j)(i - 1) + cost(s2(j - 1) + "Ɛ", s1_.substring(i - 1, i + 1)),         //insert in s2
          dist(j - 1)(i - 1) + cost(s2(j - 1).toString, s1(i - 1).toString))
      }

    dist(s2.length)(s1.length)
  }

  
  def wLevenshteinDist(s1:String, s2:String, weights:Map[String, Map[String, Float]]) = levenshteinDist(s1, s2)(genCost(weights))

  def loadWeights(filename: String): Map[String, Map[String, Float]] = {
    // store word ids
    val vocabIdxs = mutable.HashMap[String, mutable.Map[String, Float]]()

    implicit val codec: Codec = Codec.UTF8

    scala.io.Source.fromFile(filename).getLines.foreach { case line =>
      val lineFields = line.split("\\|")
      val dist = vocabIdxs.getOrElse(lineFields(0), mutable.Map[String, Float]()).updated(lineFields(1), lineFields(2).toFloat)
      vocabIdxs.update(lineFields(0), dist)
    }
    vocabIdxs.toMap.mapValues(_.toMap)
  }


  private def genCost(weights: Map[String, Map[String, Float]])(a:String, b:String): Float = {
    if (weights.contains(a) && weights(a).contains(b))
      weights(a)(b)
    else if (a == b) {
      0.0f
    }
    else
      1.0f
  }

  private def minimum(i1: Float, i2: Float, i3: Float) = min(min(i1, i2), i3)


  def learnDist(s1: String, s2: String): Seq[(String, String)] = {
    val acc: Seq[(String, String)] = Seq.empty
    val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f }

    for (j <- 1 to s2.length; i <- 1 to s1.length)
      dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1)
      else minimum(
        dist(j - 1)(i) + 1.0f,
        dist(j)(i - 1) + 1.0f,
        dist(j - 1)(i - 1) + 1.0f)

    backTrack(dist, s2, s1, s2.length, s1.length, acc)
  }

  def backTrack(dist: Array[Array[Float]], s2:String, s1:String,
                j:Int, i:Int, acc:Seq[(String, String)]): Seq[(String, String)]= {

    if (s2(j-1) == s1(i-1)) {
      if (j == 1 && i == 1)
        acc
      else
        backTrack(dist, s2, s1, j - 1, i - 1, acc)
    }
    else {
      val pSteps = Map(dist(j - 1)(i) -> ("", s2(j - 1).toString, j - 1, i),
        dist(j)(i - 1) -> (s1(i - 1).toString, "", j, i - 1),
        dist(j - 1)(i - 1) -> (s1(i - 1).toString, s2(j - 1).toString, j - 1, i - 1))

      val best = pSteps.minBy(_._1)._2
      backTrack(dist, s2, s1, best._3, best._4, acc :+ (best._1, best._2))
    }
  }

} 
Example 4
Source File: Statistics.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.stats

	final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = {
		val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2)
		@annotation.tailrec
		def go(accum: Double, i: Int): Int = {
			if(accum < p) go(accum + distribution(i)._2, i + 1)
			else i
		}
		val cpt = go(0D, 0)
		if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1
	}
} 
Example 5
Source File: SkewReplication.scala    From spark-skewjoin   with Apache License 2.0 5 votes vote down vote up
package com.tresata.spark.skewjoin

import scala.math.{ min, max }
import org.slf4j.LoggerFactory

trait SkewReplication extends Serializable {
  def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int)
}

case class DefaultSkewReplication(replicationFactor: Double = 1e-2) extends SkewReplication {
  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = (
    max(min((rightCount * replicationFactor).toInt, numPartitions), 1),
    max(min((leftCount * replicationFactor).toInt, numPartitions), 1)
  )
}

private case class RightReplication(skewReplication: SkewReplication) extends SkewReplication {
  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = {
    val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions)
    (1, max(min(left * right, numPartitions), 1))
    //(1, right)
  }
}

private case class LeftReplication(skewReplication: SkewReplication) extends SkewReplication {
  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = {
    val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions)
    (max(min(left * right, numPartitions), 1), 1)
    //(left, 1)
  }
}

private object LoggingSkewReplication {
  private val log = LoggerFactory.getLogger(getClass)
}

case class LoggingSkewReplication(skewReplication: SkewReplication) extends SkewReplication {
  import LoggingSkewReplication._
  private var maxLeftReplication = 0
  private var maxRightReplication = 0

  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = {
    val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions)
    if (left > maxLeftReplication) {
      log.info("new max left replication {}", left)
      maxLeftReplication = left
    }
    if (right > maxRightReplication) {
      log.info("new max right replication {}", right)
      maxRightReplication = right
    }
    (left, right)
  }
} 
Example 6
Source File: Utils.scala    From streamDM   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streamdm.classifiers.trees

import scala.math.min

object Utils {
  
  def arraytoString[T](pre: Array[T], split: String = ",", head: String = "{", tail: String = "}"): String = {
    val sb = new StringBuffer(head)
    for (i <- 0 until pre.length) {
      sb.append(pre(i))
      if (i < pre.length - 1)
        sb.append(split)
    }
    sb.append(tail).toString()
  }
} 
Example 7
Source File: TruncatableSeekableStream.scala    From spark-bam   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.bam.check.seqdoop

import java.io.EOFException

import hammerlab.path._
import htsjdk.samtools.seekablestream.SeekableStream
import org.hammerlab.channel.SeekableByteChannel

import scala.math.min

case class TruncatableSeekableStream(channel: SeekableByteChannel,
                                     source: Path)
  extends SeekableStream {

  var limit = channel.size

  def clear(): Unit =
    limit = channel.size

  override def length(): Long = limit

  override def seek(position: Long): Unit =
    channel.seek(
      min(
        limit,
        position
      )
    )

  override def getSource: String = source.toString()

  override def position(): Long = channel.position()
  override def eof(): Boolean = channel.position() == length()

  def remaining: Long = length() - position()

  override def read(): Int =
    if (position() < length())
      channel.read()
    else
      -1

  override def read(b: Array[Byte],
                    off: Int,
                    len: Int): Int = {
    if (len > remaining) {
      channel.read(b, off, remaining.toInt)
      throw new EOFException(
        s"Attempting to read $len bytes from offset $off when channel is at ${position()} with length ${length()} (only $remaining bytes available)"
      )
    }
    channel.read(b, off, len)
  }

  override def close(): Unit = channel.close()
}