scala.collection.mutable.HashMap Scala Examples

The following examples show how to use scala.collection.mutable.HashMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: IOCommon.scala    From Swallow   with Apache License 2.0 5 votes vote down vote up
package com.intel.hibench.sparkbench.common

import java.io.{File, FileInputStream, IOException, InputStreamReader}
import java.util.Properties

import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.io.{NullWritable, Text}
import org.apache.hadoop.mapred.SequenceFileOutputFormat
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkContext, SparkException}

import scala.collection.JavaConversions._
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag

class IOCommon(val sc:SparkContext) {
   def load[T:ClassTag:TypeTag](filename:String, force_format:Option[String]=None) = {
     val input_format = force_format.getOrElse(
       IOCommon.getProperty("sparkbench.inputformat").getOrElse("Text"))

     input_format match {
       case "Text" =>
         sc.textFile(filename)

       case "Sequence" =>
         sc.sequenceFile[NullWritable, Text](filename).map(_._2.toString)

       case _ => throw new UnsupportedOperationException(s"Unknown inpout format: $input_format")
     }
   }

   def save(filename:String, data:RDD[_], prefix:String) = {
     val output_format = IOCommon.getProperty(prefix).getOrElse("Text")
     val output_format_codec =
       loadClassByName[CompressionCodec](IOCommon.getProperty(prefix + ".codec"))

     output_format match {
       case "Text" =>
         if (output_format_codec.isEmpty)  data.saveAsTextFile(filename)
         else data.saveAsTextFile(filename, output_format_codec.get)

       case "Sequence" =>
         val sequence_data = data.map(x => (NullWritable.get(), new Text(x.toString)))
         if (output_format_codec.isEmpty) {
           sequence_data.saveAsHadoopFile[SequenceFileOutputFormat[NullWritable, Text]](filename)
         } else {
           sequence_data.saveAsHadoopFile[SequenceFileOutputFormat[NullWritable, Text]](filename,
             output_format_codec.get)
         }

       case _ => throw new UnsupportedOperationException(s"Unknown output format: $output_format")
     }
   }

   def save(filename:String, data:RDD[_]):Unit = save(filename, data, "sparkbench.outputformat")

   private def loadClassByName[T](name:Option[String]) = {
     if (!name.isEmpty) Some(Class.forName(name.get)
       .newInstance.asInstanceOf[T].getClass) else None
   }

   private def callMethod[T, R](obj:T, method_name:String) =
     obj.getClass.getMethod(method_name).invoke(obj).asInstanceOf[R]
 }

object IOCommon {
   private val sparkbench_conf: HashMap[String, String] =
     getPropertiesFromFile(System.getenv("SPARKBENCH_PROPERTIES_FILES"))

   def getPropertiesFromFile(filenames: String): HashMap[String, String] = {
     val result = new HashMap[String, String]
     filenames.split(',').filter(_.stripMargin.length > 0).foreach { filename =>
       val file = new File(filename)
       require(file.exists, s"Properties file $file does not exist")
       require(file.isFile, s"Properties file $file is not a normal file")

       val inReader = new InputStreamReader(new FileInputStream(file), "UTF-8")
       try {
         val properties = new Properties()
         properties.load(inReader)
         result ++= properties.stringPropertyNames()
           .map(k => (k, properties(k).trim)).toMap
       } catch {
         case e: IOException =>
           val message = s"Failed when loading Sparkbench properties file $file"
           throw new SparkException(message, e)
       } finally {
         inReader.close()
       }
     }
     result.filter{case (key, value) => value.toLowerCase != "none"}
   }

   def getProperty(key:String):Option[String] = sparkbench_conf.get(key)

   def dumpProperties(): Unit = sparkbench_conf
       .foreach{case (key, value)=> println(s"$key\t\t$value")}
 } 
Example 2
Source File: UniqueTermAccumulator.scala    From sparkpipe-core   with Apache License 2.0 5 votes vote down vote up
package software.uncharted.sparkpipe.ops.core.dataframe.text.util

import org.apache.spark.sql.Row
import org.apache.spark.util.AccumulatorV2
import scala.collection.mutable.HashMap


private[text] class UniqueTermAccumulator(
  private var result: HashMap[String, Int],
  private var touched: Boolean = false
) extends AccumulatorV2[Seq[String], HashMap[String, Int]] {

  def this() {
    this(new HashMap[String, Int]())
  }

  override def add(in: Seq[String]): Unit = {
    in.foreach(w => {
      result.put(w, result.getOrElse(w, 0) + 1)
    })
  }

  override def copy(): AccumulatorV2[Seq[String], HashMap[String, Int]] = {
    val clone = new HashMap[String, Int]()
    result.foreach(kv => clone.put(kv._1, kv._2))
    new UniqueTermAccumulator(clone, false)
  }

  override def isZero(): Boolean = {
    !touched
  }

  override def merge(other: AccumulatorV2[Seq[String], HashMap[String, Int]]): Unit = {
    other.value.foreach(t => {
      result.put(t._1, result.getOrElse(t._1, 0) + t._2)
    })
  }

  override def reset(): Unit = {
    result.clear
    touched = false
  }

  override def value: HashMap[String, Int] = {
    result
  }
} 
Example 3
Source File: Mapper.scala    From Scalaprof   with GNU General Public License v2.0 5 votes vote down vote up
package edu.neu.coe.scala.mapreduce

import akka.actor.{ Actor, ActorLogging, ActorRef }
import scala.collection.mutable.HashMap
import scala.util._


class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) {
  
  override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]) = {
      val v2sK2m = HashMap[K2,Seq[V2]]() // mutable
      val xs = Seq[Throwable]() // mutable
      for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t))
        v2k2e match {
          case Right((k2,v2)) => v2sK2m put(k2, v2+:(v2sK2m get(k2) getOrElse(Nil)))
          case Left(x) => xs :+ x
      }
      (v2sK2m.toMap, xs.toSeq)
  }
}

case class Incoming[K, V](m: Seq[(K,V)]) {
  override def toString = s"Incoming: with ${m.size} elements"
}

object Incoming {
  def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap})
  def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq)
}

object Mapper {
} 
Example 4
Source File: LocalKMeans.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 5
Source File: JsonUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.io.Writer

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    partitionOffsets.foreach { case (tp, off) =>
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 6
Source File: DStreamCheckpointData.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.dstream

import java.io.{IOException, ObjectInputStream, ObjectOutputStream}

import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.hadoop.fs.{FileSystem, Path}

import org.apache.spark.internal.Logging
import org.apache.spark.streaming.Time
import org.apache.spark.util.Utils

private[streaming]
class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
  extends Serializable with Logging {
  protected val data = new HashMap[Time, AnyRef]()

  // Mapping of the batch time to the checkpointed RDD file of that time
  @transient private var timeToCheckpointFile = new HashMap[Time, String]
  // Mapping of the batch time to the time of the oldest checkpointed RDD
  // in that batch's checkpoint data
  @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time]

  @transient private var fileSystem: FileSystem = null
  protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]]

  
  def restore() {
    // Create RDDs from the checkpoint data
    currentCheckpointFiles.foreach {
      case(time, file) =>
        logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'")
        dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file)))
    }
  }

  override def toString: String = {
    "[\n" + currentCheckpointFiles.size + " checkpoint files \n" +
      currentCheckpointFiles.mkString("\n") + "\n]"
  }

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    logDebug(this.getClass().getSimpleName + ".writeObject used")
    if (dstream.context.graph != null) {
      dstream.context.graph.synchronized {
        if (dstream.context.graph.checkpointInProgress) {
          oos.defaultWriteObject()
        } else {
          val msg = "Object of " + this.getClass.getName + " is being serialized " +
            " possibly as a part of closure of an RDD operation. This is because " +
            " the DStream object is being referred to from within the closure. " +
            " Please rewrite the RDD operation inside this DStream to avoid this. " +
            " This has been enforced to avoid bloating of Spark tasks " +
            " with unnecessary objects."
          throw new java.io.NotSerializableException(msg)
        }
      }
    } else {
      throw new java.io.NotSerializableException(
        "Graph is unexpectedly null when DStream is being serialized.")
    }
  }

  @throws(classOf[IOException])
  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
    logDebug(this.getClass().getSimpleName + ".readObject used")
    ois.defaultReadObject()
    timeToOldestCheckpointFileTime = new HashMap[Time, Time]
    timeToCheckpointFile = new HashMap[Time, String]
  }
} 
Example 7
Source File: MasterWebUI.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import scala.collection.mutable.HashMap

import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.deploy.master.Master
import org.apache.spark.internal.Logging
import org.apache.spark.ui.{SparkUI, WebUI}
import org.apache.spark.ui.JettyUtils._


  def initialize() {
    val masterPage = new MasterPage(this)
    attachPage(new ApplicationPage(this))
    attachPage(masterPage)
    attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
    attachHandler(createRedirectHandler(
      "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST")))
    attachHandler(createRedirectHandler(
      "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST")))
  }

  def addProxyTargets(id: String, target: String): Unit = {
    var endTarget = target.stripSuffix("/")
    val handler = createProxyHandler("/proxy/" + id, endTarget)
    attachHandler(handler)
    proxyHandlers(id) = handler
  }

  def removeProxyTargets(id: String): Unit = {
    proxyHandlers.remove(id).foreach(detachHandler)
  }
}

private[master] object MasterWebUI {
  private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
} 
Example 8
Source File: PoolTable.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import java.net.URLEncoder

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.progressListener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8"))
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 9
Source File: ConfigReader.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

} 
Example 10
Source File: StageInfo.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.storage.RDDInfo


  def fromStage(
      stage: Stage,
      attemptId: Int,
      numTasks: Option[Int] = None,
      taskMetrics: TaskMetrics = null,
      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
    ): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details,
      taskMetrics,
      taskLocalityPreferences)
  }
} 
Example 11
Source File: GroupedCountEvaluator.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  private var outputsMerged = 0
  private val sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap
    }
  }
} 
Example 12
Source File: MasterWebUISuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import java.io.DataOutputStream
import java.net.{HttpURLConnection, URL}
import java.nio.charset.StandardCharsets
import java.util.Date

import scala.collection.mutable.HashMap

import org.mockito.Mockito.{mock, times, verify, when}
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver}
import org.apache.spark.deploy.DeployTestUtils._
import org.apache.spark.deploy.master._
import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}


class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {

  val conf = new SparkConf
  val securityMgr = new SecurityManager(conf)
  val rpcEnv = mock(classOf[RpcEnv])
  val master = mock(classOf[Master])
  val masterEndpointRef = mock(classOf[RpcEndpointRef])
  when(master.securityMgr).thenReturn(securityMgr)
  when(master.conf).thenReturn(conf)
  when(master.rpcEnv).thenReturn(rpcEnv)
  when(master.self).thenReturn(masterEndpointRef)
  val masterWebUI = new MasterWebUI(master, 0)

  override def beforeAll() {
    super.beforeAll()
    masterWebUI.bind()
  }

  override def afterAll() {
    masterWebUI.stop()
    super.afterAll()
  }

  test("kill application") {
    val appDesc = createAppDesc()
    // use new start date so it isn't filtered by UI
    val activeApp = new ApplicationInfo(
      new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue)

    when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp)))

    val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/"
    val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify the master was called to remove the active app
    verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED)
  }

  test("kill driver") {
    val activeDriverId = "driver-0"
    val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/"
    val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify that master was asked to kill driver with the correct id
    verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId))
  }

  private def convPostDataToString(data: Map[String, String]): String = {
    (for ((name, value) <- data) yield s"$name=$value").mkString("&")
  }

  
  private def sendHttpRequest(
      url: String,
      method: String,
      body: String = ""): HttpURLConnection = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod(method)
    if (body.nonEmpty) {
      conn.setDoOutput(true)
      conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded")
      conn.setRequestProperty("Content-Length", Integer.toString(body.length))
      val out = new DataOutputStream(conn.getOutputStream)
      out.write(body.getBytes(StandardCharsets.UTF_8))
      out.close()
    }
    conn
  }
} 
Example 13
Source File: CMaxTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.CMaxTable
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CMaxTableSpec extends TorchSpec {
    "A CMaxTable Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = new CMaxTable[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CMaxTable()\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)"


    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be(output)
    luaOutput2 should be (gradInput)

    println("Test case : CMaxTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")

  }
} 
Example 14
Source File: L1HingeEmbeddingCriterionSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.L1HingeEmbeddingCriterion
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class L1HingeEmbeddingCriterionSpec extends TorchSpec {
    "A L1HingeEmbeddingCriterion" should "generate correct output and grad with y == 1 " in {
    torchCheck()
    val seed = 2
    RNG.setSeed(seed)
    val module = new L1HingeEmbeddingCriterion[Double](0.6)

    val input1 = Tensor[Double](2).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](2).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.0) = input1
    input(2.0) = input2

    val target = Tensor[Double](1)
    target(Array(1)) = 1.0

    val start = System.nanoTime()
    val output = module.forward(input, target)
    val gradInput = module.backward(input, target)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.L1HingeEmbeddingCriterion(0.6)\n" +
      "output = module:forward(input, 1)\n" +
      "gradInput = module:backward(input, 1)\n"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input), Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Double]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be(output)
    luaOutput2 should be (gradInput)

    println("Test case : L1HingeEmbeddingCriterion, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }

  "A L1HingeEmbeddingCriterion" should "generate correct output and grad with y == -1 " in {
    torchCheck()
    val seed = 2
    RNG.setSeed(seed)
    val module = new L1HingeEmbeddingCriterion[Double](0.6)

    val input1 = Tensor[Double](2).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](2).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.0) = input1
    input(2.0) = input2

    val target = Tensor[Double](1)
    target(Array(1)) = -1.0

    val start = System.nanoTime()
    val output = module.forward(input, target)
    val gradInput = module.backward(input, target)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.L1HingeEmbeddingCriterion(0.6)\n" +
      "output = module:forward(input, -1.0)\n" +
      "gradInput = module:backward(input, -1.0)\n"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input), Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Double]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be(output)
    luaOutput2 should be (gradInput)

    println("Test case : L1HingeEmbeddingCriterion, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 15
Source File: CDivTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.CDivTable
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CDivTableSpec extends TorchSpec {
    "A CDivTable Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = new CDivTable[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CDivTable()\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)"


    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be (output)
    luaOutput2 should be (gradInput)

    println("Test case : CDivTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 16
Source File: CMulTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table
import com.intel.analytics.bigdl.nn.CMulTable

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CMulTableSpec extends TorchSpec {
    "A CMulTable Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = new CMulTable[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CMulTable()\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)"


    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be (output)
    luaOutput2 should be (gradInput)

    println("Test case : CMinTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")

  }
} 
Example 17
Source File: CosineDistanceSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.CosineDistance
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CosineDistanceSpec extends TorchSpec {
    "A CosineDistance " should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)

    val input1 = Tensor[Double](3).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](3).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](1).apply1(e => Random.nextDouble())

    val input = new Table()
    input(1.0) = input1
    input(2.0) = input2

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CosineDistance()\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)\n"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))

    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    val module = new CosineDistance[Double]()
    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    output should be(luaOutput1)
    luaOutput2 should be (gradInput)

    println("Test case : CosineDistance, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 18
Source File: CosineEmbeddingCriterionSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.CosineEmbeddingCriterion
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.{RandomGenerator, Table}

import scala.collection.mutable.HashMap

import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CosineEmbeddingCriterionSpec extends TorchSpec {
    "A CosineEmbeddingCriterion Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = new CosineEmbeddingCriterion[Double](0.2)

    val input1 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 2))
    val input2 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 1))
    val input = new Table()
    input(1.0) = input1
    input(2.0) = input2

    val target = new Table()
    val target1 = Tensor[Double](Storage(Array(-0.5)))
    target(1.toDouble) = target1

    val start = System.nanoTime()
    val output = module.forward(input, target)
    val gradInput = module.backward(input, target)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CosineEmbeddingCriterion(0.2)\n" +
      "_idx = module._idx\n" +
      "_outputs = module._outputs\n" +
      "buffer = module.buffer\n" +
      "output = module:forward(input, -0.5)\n" +
      "gradInput = module:backward(input, -0.5)\n"


    val (luaTime, torchResult) = TH.run(code, Map("input" -> input),
      Array("output", "gradInput", "_idx", "buffer", "_outputs"))
    val luaOutput1 = torchResult("output").asInstanceOf[Double]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be(output)
    luaOutput2 should be (gradInput)

    println("Test case : CrossEntropyCriterion, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 19
Source File: CosineDistanceCriterionSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.CosineDistanceCriterion
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.{RandomGenerator, Table}

import scala.collection.mutable.HashMap

import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CosineDistanceCriterionSpec extends TorchSpec {
  "A CosineDistanceCriterionSpec Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = CosineDistanceCriterion[Double](false)

    val input1 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 2))
    val input2 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 1))
    val input = new Table()
    input(1.0) = input1
    input(2.0) = input2

    val target = new Table()
    val target1 = Tensor[Double](Storage(Array(1.0)))
    target(1.toDouble) = target1

    val start = System.nanoTime()
    val output = module.forward(input1, input2)
    val gradInput = module.backward(input1, input2)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CosineEmbeddingCriterion(0.0)\n" +
      "_idx = module._idx\n" +
      "_outputs = module._outputs\n" +
      "buffer = module.buffer\n" +
      "output = module:forward(input, 1.0)\n" +
      "gradInput = module:backward(input, 1.0)\n"


    val (luaTime, torchResult) = TH.run(code, Map("input" -> input),
      Array("output", "gradInput", "_idx", "buffer", "_outputs"))
    val luaOutput1 = torchResult("output").asInstanceOf[Double]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be(output)
    luaOutput2[Tensor[Double]](1) should be (gradInput.squeeze())

    println("Test case : CrossEntropyCriterion, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 20
Source File: NarrowTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.NarrowTable
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.{T, Table}

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class NarrowTableSpec extends TorchSpec {
    "A NarrowTable Module " should "generate correct output and grad" in {
    torchCheck()
    val module = new NarrowTable[Double](1, 2)

    val input = T()
    input(1.0) = Tensor[Double](2, 3).apply1(e => Random.nextDouble())
    input(2.0) = Tensor[Double](2, 1).apply1(e => Random.nextDouble())
    input(3.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble())

    val gradOutput = T()
    gradOutput(1.0) = Tensor[Double](5, 3).apply1(e => Random.nextDouble())
    gradOutput(2.0) = Tensor[Double](2, 5).apply1(e => Random.nextDouble())

    val code = "module = nn.NarrowTable(1, 2)\n" +
      "local i = 0\n" +
      "while i < 10 do\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input, gradOutput)\n" +
      "i = i + 1\n" +
      "end"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))

    val luaOutput1 = torchResult("output").asInstanceOf[Table]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    val start = System.nanoTime()
    var i = 0
    var output = T()
    var gradInput = T()
    while (i < 10) {
      output = module.forward(input)
      gradInput = module.backward(input, gradOutput)
      i += 1
    }
    val end = System.nanoTime()
    val scalaTime = end - start

    luaOutput1 should be (output)
    luaOutput2 should be (gradInput)

    println("Test case : NarrowTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }

  "A NarrowTable Module with negative length" should "generate correct output and grad" in {
    torchCheck()
    val module = new NarrowTable[Double](2, -2)

    val input = T()
    input(1.0) = Tensor[Double](2, 3).apply1(e => Random.nextDouble())
    input(2.0) = Tensor[Double](2, 1).apply1(e => Random.nextDouble())
    input(3.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble())
    input(4.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble())

    val gradOutput = T()
    gradOutput(1.0) = Tensor[Double](5, 3).apply1(e => Random.nextDouble())
    gradOutput(2.0) = Tensor[Double](2, 5).apply1(e => Random.nextDouble())

    val start = System.nanoTime()
    var i = 0
    var output = T()
    var gradInput = T()
    output = module.forward(input)
    gradInput = module.backward(input, gradOutput)
    i += 1
    val end = System.nanoTime()
    val scalaTime = end - start

    val gradInput1 = gradInput[Tensor[Double]](2.0)
    val gradInput2 = gradInput[Tensor[Double]](3.0)
    val expectedGradInput1 = gradOutput[Tensor[Double]](1.0)
    val expectedGradInput2 = gradOutput[Tensor[Double]](2.0)

    val output1 = output[Tensor[Double]](1.0)
    val output2 = output[Tensor[Double]](2.0)
    val expectedOutput1 = input[Tensor[Double]](2.0)
    val expectedOutput2 = input[Tensor[Double]](3.0)

    output1 should be (expectedOutput1)
    output2 should be (expectedOutput2)

    gradInput1 should be (expectedGradInput1)
    gradInput2 should be (expectedGradInput2)
  }
} 
Example 21
Source File: MarginRankingCriterionSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch
import com.intel.analytics.bigdl.nn.MarginRankingCriterion
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class MarginRankingCriterionSpec extends TorchSpec {
    "A MarginRankingCriterion " should "generate correct output and grad with only value" in {
    torchCheck()
    val mse = new MarginRankingCriterion[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())

    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val target = new Table()
    val target1 = Tensor[Double](Storage(Array(-1.0)))
    target(1.toDouble) = target1

    val start = System.nanoTime()
    val output = mse.forward(input, target)
    val gradInput = mse.backward(input, target)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "mse = nn.MarginRankingCriterion()\n" +
      "output = mse:forward(input,-1)\n" +
      "gradInput = mse:backward(input,-1)"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "target" -> target),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Double]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be (output)
    gradInput should equal (luaOutput2)

    println("Test case : MarginRankingCriterion, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }

  "A MarginRankingCriterion " should "generate correct output and grad with Tensor target" in {
    torchCheck()
    val mse = new MarginRankingCriterion[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())

    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val target = new Table()
    val target1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    target(1.toDouble) = target1

    val start = System.nanoTime()
    val output = mse.forward(input, target)
    val gradInput = mse.backward(input, target)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "mse = nn.MarginRankingCriterion()\n" +
      "output = mse:forward(input, target)\n" +
      "gradInput = mse:backward(input, target)"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "target" -> target1),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Double]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be (output)
    gradInput should equal (luaOutput2)

    println("Test case : MarginRankingCriterion, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 22
Source File: MaskedSelectSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.MaskedSelect
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class MaskedSelectSpec extends TorchSpec {
    "A MaskedSelect Module " should "generate correct output and grad" in {
    torchCheck()
    val module = new MaskedSelect[Double]()
    val input1 = Tensor[Double](2, 2).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](2, 2)
    input2(Array(1, 1)) = 1
    input2(Array(1, 2)) = 0
    input2(Array(2, 1)) = 0
    input2(Array(2, 2)) = 1

    val input = new Table()
    input(1.0) = input1
    input(2.0) = input2

    val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble())

    val code = "module = nn.MaskedSelect()\n" +
      "mask = torch.ByteTensor({{1, 0}, {0, 1}})\n" +
      "output = module:forward({input1, mask})\n" +
      "gradInput = module:backward({input1, mask}, gradOutput)\n" +
      "gradInput[2] = gradInput[2]:double()"

    val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))

    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    output should be (luaOutput1)
    gradInput should equal (luaOutput2)

    println("Test case : MaskedSelect, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }

} 
Example 23
Source File: CMinTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.CMinTable
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CMinTableSpec extends TorchSpec {
    "A CMaxTable Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = new CMinTable[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CMinTable()\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)\n"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be (output)
    luaOutput2 should be (gradInput)

    println("Test case : CMinTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 24
Source File: MixtureTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.MixtureTable
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class MixtureTableSpec extends TorchSpec {
    "A MixtureTable " should "generate correct output and grad with table expertInput" in {
    torchCheck()
    val mse = new MixtureTable[Double]

    val expertInput = Tensor[Double](5, 3, 6).apply1(e => Random.nextDouble())
    val expertTable = new Table()
    expertTable(1.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble())
    expertTable(2.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble())
    expertTable(3.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble())

    val input1 = Tensor[Double](5, 3).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5, 6).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.0) = input1
    input(2.0) = expertTable

    val start = System.nanoTime()
    val output = mse.forward(input)
    val gradInput = mse.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "mse = nn.MixtureTable()\n" +
      "input = {input1, expertTable}\n" +
      "output = mse:forward(input)\n" +
      "gradInput = mse:backward(input,gradOutput)"

    val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "expertTable" -> expertTable,
      "gradOutput" -> gradOutput), Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    output should be (luaOutput1)
    luaOutput2 should be (gradInput)

    println("Test case : MixtureTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }

  "A MixtureTable " should "generate correct output and grad with tensor expertInput" in {
    torchCheck()
    val mse = new MixtureTable[Double]

    val expertInput = Tensor[Double](5, 3, 6).apply1(e => Random.nextDouble())
    val input1 = Tensor[Double](5, 3).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5, 6).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.0) = input1
    input(2.0) = expertInput

    val code = "mse = nn.MixtureTable()\n" +
      "output = mse:forward(input)\n" +
      "gradInput = mse:backward(input,gradOutput)\n" +
      "size = mse.size\n" +
      "dim = mse.dim"

    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput", "size", "dim"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    val start = System.nanoTime()
    val output = mse.forward(input)
    val gradInput = mse.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    output should be (luaOutput1)
    gradInput should be (luaOutput2)

    println("Test case : MixtureTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 25
Source File: IndexSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.nn.Index
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class IndexSpec extends TorchSpec {
    "A Index " should "generate correct output and grad with one dimension" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)

    val input1 = Tensor[Double](3).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](4)
    input2(Array(1)) = 1
    input2(Array(2)) = 2
    input2(Array(3)) = 2
    input2(Array(4)) = 3
    val gradOutput = Tensor[Double](4).apply1(e => Random.nextDouble())

    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val code = "torch.manualSeed(" + seed + ")\n" +
      "input = {input1, torch.LongTensor{1, 2, 2, 3}}\n" +
      "module = nn.Index(1)\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)\n"

    val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))

    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    val module = new Index[Double](1)
    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    output should be(luaOutput1)
    luaOutput2 should be (gradInput)

    println("Test case : Index, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }

  "A Index " should "generate correct output and grad with two dimension" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)

    val input1 = Tensor[Double](3, 3).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](4)
    input2(Array(1)) = 1
    input2(Array(2)) = 2
    input2(Array(3)) = 3
    input2(Array(4)) = 1

    val gradOutput = Tensor[Double](3, 4).apply1(e => Random.nextDouble())

    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val code = "torch.manualSeed(" + seed + ")\n" +
      "input = {input1, torch.LongTensor{1, 2, 3, 1}}\n" +
      "module = nn.Index(2)\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)\n"

    val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))

    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    val module = new Index[Double](2)
    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    output should be(luaOutput1)
    luaOutput2 should be (gradInput)

    println("Test case : Index, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }


} 
Example 26
Source File: CSubTableSpec.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.integration.torch

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.Table
import com.intel.analytics.bigdl.nn.CSubTable

import scala.collection.mutable.HashMap
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class CSubTableSpec extends TorchSpec {
    "A CDivTable Module" should "generate correct output and grad" in {
    torchCheck()
    val seed = 100
    RNG.setSeed(seed)
    val module = new CSubTable[Double]()

    val input1 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input2 = Tensor[Double](5).apply1(e => Random.nextDouble())
    val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble())
    val input = new Table()
    input(1.toDouble) = input1
    input(2.toDouble) = input2

    val start = System.nanoTime()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    val end = System.nanoTime()
    val scalaTime = end - start

    val code = "torch.manualSeed(" + seed + ")\n" +
      "module = nn.CSubTable()\n" +
      "output = module:forward(input)\n" +
      "gradInput = module:backward(input,gradOutput)"


    val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput),
      Array("output", "gradInput"))
    val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]]
    val luaOutput2 = torchResult("gradInput").asInstanceOf[Table]

    luaOutput1 should be(output)
    luaOutput2 should be (gradInput)

    println("Test case : CSubTable, Torch : " + luaTime +
      " s, Scala : " + scalaTime / 1e9 + " s")
  }
} 
Example 27
Source File: KeyBinder.scala    From slide-desktop   with GNU General Public License v2.0 5 votes vote down vote up
package gui

import java.awt.event.{KeyEvent, KeyListener}
import scala.collection.mutable.HashMap

abstract class KeyBinder(val keyCodes: Int*) extends KeyListener {
    private val keyMap: HashMap[Int, Boolean] = new HashMap[Int, Boolean]

    override def keyTyped(e: KeyEvent): Unit = {}

    override def keyPressed(e: KeyEvent): Unit = {
        keyMap.put(e.getKeyCode, true)
        if (getKeysDown)
            onKeysDown()
    }

    override def keyReleased(e: KeyEvent): Unit =
        keyMap.remove(e.getKeyCode)

    private def getKeysDown: Boolean = {
        this.keyCodes.foreach(key =>
            if (keyMap.contains(key)) {
                if (!keyMap.get(key).get)
                    return false
            }
            else
                return false
        )
        keyMap.clear()

        true
    }

    def onKeysDown(): Unit
} 
Example 28
Source File: TopElementsAggregator.scala    From salt-core   with Apache License 2.0 5 votes vote down vote up
package software.uncharted.salt.core.analytic.collection

import software.uncharted.salt.core.analytic.Aggregator

import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.{Map => MutableMap}
import scala.collection.mutable.PriorityQueue
import scala.reflect.ClassTag


class TopElementsAggregator[ET: ClassTag](elementLimit: Int)
extends Aggregator[Seq[ET], Map[ET, Int], List[(ET, Int)]] {

  def default(): Map[ET, Int] = {
    Map[ET, Int]()
  }

  override def add(current: Map[ET, Int], next: Option[Seq[ET]]): Map[ET, Int] = {
    if (next.isDefined) {
      // If our current map is mutable, add new data in directly.
      // If not, convert to a mutable map, and then add data in
      val sum = current match {
        case hm: MutableMap[ET, Int] => hm
        case _ => {
          // The current value isn't itself a mutable hashmap yet; convert to one.
          val hm = new HashMap[ET, Int]()
          hm ++= current
          hm
        }
      }
      next.get.foreach(t => sum.put(t, sum.getOrElse(t, 0) + 1))
      sum
    } else {
      current
    }
  }

  override def merge(left: Map[ET, Int], right: Map[ET, Int]): Map[ET, Int] = {
    // If either input map is mutable, merge the other into it.
    // If neither is, convert one to mutable, and add the other into it.
    val (to, from) = left match {
      case hm: MutableMap[ET, Int] => (hm, right)
      case _ =>
        right match {
          case hm: MutableMap[ET, Int] => (hm, left)
          case _ =>
            val hm = new HashMap[ET, Int]()
            hm ++= left
            (hm, right)
        }
    }
    from.foreach(t => {
      to.put(t._1, to.getOrElse(t._1, 0) + t._2)
    })
    to
  }

  override def finish(intermediate: Map[ET, Int]): List[(ET, Int)] = {
    val x = new PriorityQueue[(ET, Int)]()(Ordering.by(
      a => a._2
    ))
    intermediate.foreach(t => {
      x.enqueue(t)
    })
    var result = new ListBuffer[(ET, Int)]
    for (i <- 0 until Math.min(elementLimit, x.size)) {
      result.append(x.dequeue)
    }
    result.toList
  }
} 
Example 29
Source File: ConfManager.scala    From HadoopLearning   with MIT License 5 votes vote down vote up
package com.utils

import java.util.regex.Pattern

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.mutable.HashMap

/**
  * 描述 Spark Streaming 配置
  *
  * @author liumm
  * @since 2018-07-27 20:27
  */
object ConfManager {

  /**
    * 每次入库最大记录数量
    */
  val maxRecords = 1000

  /**
    * 配置Kafka
    *
    * @param streamConf
    * @return
    */
  def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = {
    (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics))
  }

  def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = {
    val kafkaParams = new HashMap[String, String]()
    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers)
    kafkaParams += ("metadata.broker.list" -> streamConf.brokers)
    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest")
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId)
    kafkaParams.toMap
  }

  /**
    * 生成Kafka的Consumer配置信息
    *
    * @return Kafka的Consumer配置信息
    */
  private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = {
    val kafkaParams = new HashMap[String, Object]()

    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers)
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId)
    kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
    kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])

    kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024))
    kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100))

    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest")
    //关闭kafka自动提交offset方式
    kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))

    kafkaParams.toMap
  }

  def newStreamConf() = {
    val conf = new StreamConf()
    conf.zkUrl = "hdp01:2181"
    conf.brokers = "hdp01:9092"
    conf.groupId = "liumm_group"
    conf.topics = "i57_.*"
    conf
  }

} 
Example 30
Source File: GlobalPerformer.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import akka.actor.SupervisorStrategy.Restart
import akka.actor.{Actor, ActorLogging, ActorRef, OneForOneStrategy, Props, Terminated}
import akka.routing._
import play.api.libs.json.JsObject

import scala.collection.mutable.HashMap
import scala.concurrent.duration._

protected class GlobalPerformer(val orchestrationID: String,
                                val orchestrationName: String,
                                val globalPerformers: List[JsObject],
                                val ensemblesSpec :  List[JsObject]) extends Actor with ActorLogging{

  val monitoring_actor = FEY_MONITOR.actorRef
  var global_metadata: Map[String, Performer] = Map.empty[String, Performer]

  override def receive: Receive = {

    case GlobalPerformer.PRINT_GLOBAL =>
      context.actorSelection(s"*") ! FeyGenericActor.PRINT_PATH

    case Terminated(actor) =>
      monitoring_actor  ! Monitor.TERMINATE(actor.path.toString, Utils.getTimestamp)
      log.error(s"DEAD Global Performers ${actor.path.name}")
      context.children.foreach{ child =>
        context.unwatch(child)
        context.stop(child)
      }
      throw new RestartGlobalPerformers(s"DEAD Global Performer ${actor.path.name}")

    case GetRoutees => //Discard

    case x => log.warning(s"Message $x not treated by Global Performers")
  }

  
  private def loadClazzFromJar(classPath: String, jarLocation: String, jarName: String):Class[FeyGenericActor] = {
    try {
      Utils.loadActorClassFromJar(jarLocation,classPath,jarName)
    }catch {
      case e: Exception =>
        log.error(e,s"Could not load class $classPath from jar $jarLocation. Please, check the Jar repository path as well the jar name")
        throw e
    }
  }

}

object GlobalPerformer{

  val activeGlobalPerformers:HashMap[String, Map[String, ActorRef]] = HashMap.empty[String, Map[String, ActorRef]]

  case object PRINT_GLOBAL
} 
Example 31
Source File: TokenAuthorizingInterceptor.scala    From meteorite-core   with Apache License 2.0 5 votes vote down vote up
package bi.meteorite.core.security.authorization

import java.lang.reflect.Method
import org.apache.cxf.security.SecurityContext

import java.util

import scala.collection.mutable.HashMap
import scala.collection.mutable.ListBuffer

import scala.collection.JavaConversions._
import TokenAuthorizingInterceptor._
import scala.collection.JavaConverters._

object TokenAuthorizingInterceptor {

  private def parseRolesMap(rolesMap: Map[String, String]): scala.collection.mutable.HashMap[String, List[String]] = {
    val map = new scala.collection.mutable.HashMap[String, List[String]]()
    for ((key, value) <- rolesMap) {
      map.put(key, value.split(" ").toList)
    }
    map
  }
}


class TokenAuthorizingInterceptor(uniqueId: Boolean) extends TokenAbstractAutorizingInInterceptor(uniqueId) {

  private val methodRolesMap = new HashMap[String, List[String]]()

  private var userRolesMap = new scala.collection.mutable.HashMap[String, List[String]]

  private var globalRoles =  new scala.collection.mutable.ListBuffer[String]

  private var checkConfiguredRolesOnly: Boolean = _

  def this() {
    this(true)
  }

  protected override def isUserInRole(sc: SecurityContext, roles: util.List[String], deny: Boolean): Boolean = {
    if (!checkConfiguredRolesOnly && !super.isUserInRole(sc, roles, deny)) {
      return false
    }
    if (userRolesMap.nonEmpty) {
      val userRoles = userRolesMap.get(sc.getUserPrincipal.getName)
      if (userRoles == null) {
        return false
      }
      for (role <- roles if userRoles.get.contains(role)) {
        return true
      }
      false
    } else {
      !checkConfiguredRolesOnly
    }
  }

  private def createMethodSig(method: Method): String = {
    val b = new StringBuilder(method.getReturnType.getName)
    b.append(' ').append(method.getName).append('(')
    for (cls <- method.getParameterTypes) {
      b.append(cls.getName)
    }
    b.append(')')
    b.toString

    method.getName
  }

  protected override def getExpectedRoles(method: Method): util.List[String] = {

    var roles = methodRolesMap.get(createMethodSig(method))

    if(roles.isEmpty) {
      roles = methodRolesMap.get(method.getName)
    }

    if(roles.isEmpty){
      globalRoles.toList
    }
    else{
      roles.get
    }

  }

  def setMethodRolesMap(rolesMap: java.util.Map[String, String]) =
    methodRolesMap.putAll(parseRolesMap(rolesMap.asScala.toMap))

  def setUserRolesMap(rolesMap: java.util.Map[String, String]) = userRolesMap = parseRolesMap(rolesMap.asScala.toMap)

  def setGlobalRoles(roles: String) = globalRoles = roles.split(" ").to[ListBuffer]

  def setCheckConfiguredRolesOnly(checkConfiguredRolesOnly: Boolean) = this.checkConfiguredRolesOnly =
    checkConfiguredRolesOnly

} 
Example 32
Source File: KernelMatrix.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.learning

import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import breeze.linalg._

import org.apache.spark.rdd.RDD

import keystoneml.utils.{MatrixUtils, Stats}
import keystoneml.workflow.{Transformer, LabelEstimator}


class BlockKernelMatrix[T: ClassTag](
    val kernelGen: KernelTransformer[T],
    val data: RDD[T],
    val cacheKernel: Boolean)
  extends KernelMatrix {

  val colBlockCache = HashMap.empty[Seq[Int], RDD[DenseMatrix[Double]]]
  val diagBlockCache = HashMap.empty[Seq[Int], DenseMatrix[Double]]

  def apply(colIdxs: Seq[Int]): RDD[DenseMatrix[Double]] = {
    if (colBlockCache.contains(colIdxs)) {
      colBlockCache(colIdxs)
    } else {
      val (kBlock, diagBlock) = kernelGen.computeKernel(data, colIdxs)
      if (cacheKernel) {
        colBlockCache += (colIdxs -> kBlock)
        diagBlockCache += (colIdxs -> diagBlock)
      }
      kBlock
    }
  }

  def unpersist(colIdxs: Seq[Int]): Unit = {
    if (colBlockCache.contains(colIdxs) && !cacheKernel) {
      colBlockCache(colIdxs).unpersist(true)
    }
  }

  def diagBlock(idxs: Seq[Int]): DenseMatrix[Double] = {
    if (!diagBlockCache.contains(idxs)) {
      val (kBlock, diagBlock) = kernelGen.computeKernel(data, idxs)
      if (cacheKernel) {
        colBlockCache += (idxs -> kBlock)
        diagBlockCache += (idxs -> diagBlock)
      }
      diagBlock
    } else {
      diagBlockCache(idxs)
    }
  }
} 
Example 33
Source File: ParameterTest.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.core.request

import com.yahoo.maha.core.request.ReportFormatType.CSVFormat
import com.yahoo.maha.core.{Engine, HiveEngine}
import org.json4s._
import org.json4s.jackson.JsonMethods
import org.scalatest.{FunSuite, Matchers}

import scala.collection.mutable.HashMap


class ParameterTest extends FunSuite with Matchers {

  test("SerializeParameters should serialize a map of parameters into a List") {
    val map_parameters = new HashMap[Parameter, ParameterValue[_]]
    map_parameters.put(Parameter.ReportFormat, ReportFormatValue(ReportFormatType.CSVFormat))
    map_parameters.put(Parameter.DryRun, DryRunValue(false))
    map_parameters.put(Parameter.GeneratedQuery, GeneratedQueryValue("GeneratedQuery"))
    map_parameters.put(Parameter.QueryEngine, QueryEngineValue(Engine.from("hive").get))
    map_parameters.put(Parameter.Debug, DebugValue(true))
    map_parameters.put(Parameter.RequestId, RequestIdValue("RequestId"))
    map_parameters.put(Parameter.UserId, UserIdValue("UserId"))
    map_parameters.put(Parameter.TimeZone, TimeZoneValue("TimeZone"))
    map_parameters.put(Parameter.Schema, SchemaValue("Schema"))
    map_parameters.put(Parameter.Distinct, DistinctValue(true))
    map_parameters.put(Parameter.JobName, JobNameValue("tools_1"))
    map_parameters.put(Parameter.RegistryName, RegistryNameValue("mahaRegistry"))
    map_parameters.put(Parameter.HostName, HostNameValue("127.0.0.1"))

    val result = Parameter.serializeParameters(map_parameters.toMap)
    result.length shouldBe map_parameters.size

    val newMap = result.map(t=> t._1 -> t._2).toMap
    for((k,v) <- map_parameters) {
      newMap.get(k.entryName).get match{
        case JString(x) => v.value match {
          case CSVFormat => x shouldBe "csv"
          case HiveEngine => x shouldBe "Hive"
          case _ => x shouldBe v.value
        }
        case JBool(x) => x shouldBe v.value
        case _ => fail
      }
    }

  }

  test("DeserializeParameters should deserialize a JSON into a Map of parameter values") {
    val inputJson=
      """
        |{
        | "Report-Format": "csv",
        | "Dry-Run": false,
        | "Generated-Query": "Generated-Query",
        | "Query-Engine": "oracle",
        | "debug": true,
        | "Request-Id": "Request-Id",
        | "User-Id": "User-Id",
        | "TimeZone": "TimeZone",
        | "Schema": "Schema",
        | "Distinct": true,
        | "Job-Name": "Job-Name",
        | "RegistryName": "mahaRegistry",
        | "HostName": "127.0.0.1"
        |}
        |""".stripMargin
    val result = Parameter.deserializeParameters(JsonMethods.parse(inputJson))

    result.getOrElse() match{
      case m: Map[Parameter, ParameterValue[_]] => {
        m.size shouldBe 13
        m.get(Parameter.ReportFormat).get shouldBe ReportFormatValue(ReportFormatType.CSVFormat)
        m.get(Parameter.DryRun).get shouldBe DryRunValue(false)
        m.get(Parameter.GeneratedQuery).get shouldBe GeneratedQueryValue("Generated-Query")
        m.get(Parameter.QueryEngine).get shouldBe QueryEngineValue(Engine.from("oracle").get)
        m.get(Parameter.Debug).get shouldBe DebugValue(true)
        m.get(Parameter.RequestId).get shouldBe RequestIdValue("Request-Id")
        m.get(Parameter.UserId).get shouldBe UserIdValue("User-Id")
        m.get(Parameter.TimeZone).get shouldBe TimeZoneValue("TimeZone")
        m.get(Parameter.Schema).get shouldBe SchemaValue("Schema")
        m.get(Parameter.Distinct).get shouldBe DistinctValue(true)
        m.get(Parameter.JobName).get shouldBe JobNameValue("Job-Name")
        m.get(Parameter.RegistryName).get shouldBe RegistryNameValue("mahaRegistry")
        m.get(Parameter.HostName).get shouldBe HostNameValue("127.0.0.1")
      }
      case _ => fail
    }

  }

} 
Example 34
Source File: depgraph.scala    From sbt-blockade   with Apache License 2.0 5 votes vote down vote up
//: ----------------------------------------------------------------------------
//: Copyright 2015 Johannes Rudolph
//:
//: Distributed under the Apache 2.0 License, please see the NOTICE
//: file in the root of the project for further details.
//: ----------------------------------------------------------------------------
package verizon.build

object depgraph {

  import java.io.File
  import sbt._
  import scala.collection.mutable.{HashMap, MultiMap, Set}
  import scala.language.reflectiveCalls

  object SbtUpdateReport {

    type OrganizationArtifactReport = {
      def modules: Seq[ModuleReport]
    }

    def fromConfigurationReport(report: ConfigurationReport, rootInfo: sbt.ModuleID): ModuleGraph = {
      implicit def id(sbtId: sbt.ModuleID): ModuleId = ModuleId(sbtId.organization, sbtId.name, sbtId.revision)

      def moduleEdges(orgArt: OrganizationArtifactReport): Seq[(Module, Seq[Edge])] = {
        val chosenVersion = orgArt.modules.find(!_.evicted).map(_.module.revision)
        orgArt.modules.map(moduleEdge(chosenVersion))
      }

      def moduleEdge(chosenVersion: Option[String])(report: ModuleReport): (Module, Seq[Edge]) = {
        val evictedByVersion = if (report.evicted) chosenVersion else None
        val jarFile = report.artifacts.find(_._1.`type` == "jar").orElse(report.artifacts.find(_._1.extension == "jar")).map(_._2)
        (Module(
          id = report.module,
          license = report.licenses.headOption.map(_._1),
          evictedByVersion = evictedByVersion,
          jarFile = jarFile,
          error = report.problem
        ), report.callers.map(caller ⇒ Edge(caller.caller, report.module)))
      }

      val (nodes, edges) = report.details.flatMap(moduleEdges).unzip
      val root = Module(rootInfo)

      ModuleGraph(root +: nodes, edges.flatten)
    }
  }

  type Edge = (ModuleId, ModuleId)

  def Edge(from: ModuleId, to: ModuleId): Edge = from -> to

  case class ModuleId(organisation: String,
                      name: String,
                      version: String) {
    def idString: String = organisation + ":" + name + ":" + version
  }

  case class Module(id: ModuleId,
                    license: Option[String] = None,
                    extraInfo: String = "",
                    evictedByVersion: Option[String] = None,
                    jarFile: Option[File] = None,
                    error: Option[String] = None) {
    def hadError: Boolean = error.isDefined

    def isUsed: Boolean = !isEvicted

    def isEvicted: Boolean = evictedByVersion.isDefined
  }

  case class ModuleGraph(nodes: Seq[Module], edges: Seq[Edge]) {
    lazy val modules: Map[ModuleId, Module] =
      nodes.map(n ⇒ (n.id, n)).toMap

    def module(id: ModuleId): Module = modules(id)

    lazy val dependencyMap: Map[ModuleId, Seq[Module]] =
      createMap(identity)

    lazy val reverseDependencyMap: Map[ModuleId, Seq[Module]] =
      createMap { case (a, b) ⇒ (b, a) }

    def createMap(bindingFor: ((ModuleId, ModuleId)) ⇒ (ModuleId, ModuleId)): Map[ModuleId, Seq[Module]] = {
      val m = new HashMap[ModuleId, Set[Module]] with MultiMap[ModuleId, Module]
      edges.foreach { entry ⇒
        val (f, t) = bindingFor(entry)
        m.addBinding(f, module(t))
      }
      m.toMap.mapValues(_.toSeq.sortBy(_.id.idString)).withDefaultValue(Nil)
    }

    def roots: Seq[Module] =
      nodes.filter(n ⇒ !edges.exists(_._2 == n.id)).sortBy(_.id.idString)

    def isEmpty: Boolean = nodes.isEmpty
  }

} 
Example 35
Source File: Checksum.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.dsl.transformations

import java.security.MessageDigest

import org.apache.hadoop.fs.{FileStatus, Path}
import org.schedoscope.Schedoscope
import org.schedoscope.scheduler.driver.FilesystemDriver._

import scala.Array.canBuildFrom
import scala.collection.mutable.HashMap

object Checksum {
  private def md5 = MessageDigest.getInstance("MD5")

  private def listFiles(path: String): Array[FileStatus] = {
    val files = fileSystem(path, Schedoscope.settings.hadoopConf).globStatus(new Path(path))
    if (files != null)
      files
    else Array()
  }

  private def fileChecksum(path: String) =
    if (path == null)
      "null-checksum"
    else if (path.endsWith(".jar"))
      path
    else try {
      val cs = fileSystem(path, Schedoscope.settings.hadoopConf).getFileChecksum(new Path(path))
      if (cs == null)
        path
      else
        cs.toString()
    } catch {
      case _: Throwable => path
    }

  def fileChecksums(paths: List[String], recursive: Boolean): List[String] =
    paths.flatMap(path => {
      if (fileSystem(path, Schedoscope.settings.hadoopConf).isFile(new Path(path)))
        List(fileChecksum(path))
      else if (recursive)
        fileChecksums(listFiles(path + "/*").map(f => f.getPath.toString()).toList, recursive)
      else
        List()
    }).sorted

  val resourceHashCache = new HashMap[List[String], List[String]]()

  def resourceHashes(resources: List[String]): List[String] = synchronized {
    resourceHashCache.getOrElseUpdate(resources, fileChecksums(resources, true))
  }

  val defaultDigest = "0"

  def digest(stringsToDigest: String*): String =
    if (stringsToDigest.isEmpty)
      defaultDigest
    else
      md5.digest(stringsToDigest.sorted.mkString.toCharArray().map(_.toByte)).map("%02X" format _).mkString

  object SchemaChecksum {
    val checksumProperty = "schema.checksum"
  }

  object TransformationChecksum {
    val checksumProperty = "transformation.checksum"
    val timestampProperty = "transformation.timestamp"
  }

} 
Example 36
Source File: BackOffSupervision.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.scheduler.utils

import akka.actor.{ActorRef, ActorSystem}
import org.slf4j.LoggerFactory

import scala.collection.mutable.HashMap
import scala.concurrent.duration.{FiniteDuration, _}


  def manageActorLifecycle(managedActor: ActorRef, backOffSlotTime: FiniteDuration = null, backOffMinimumDelay: FiniteDuration = null): FiniteDuration = {
    val managedActorName = managedActor.path.toStringWithoutAddress

    if (actorBackOffWaitTime.contains(managedActorName)) {
      val newBackOff = actorBackOffWaitTime(managedActorName).nextBackOff
      actorBackOffWaitTime.put(managedActorName, newBackOff)
      log.warn(s"$managerName: Set new back-off waiting " +
        s"time to value ${newBackOff.backOffWaitTime} for rebooted actor ${managedActorName}; " +
        s"(retries=${newBackOff.retries}, resets=${newBackOff.resets}, total-retries=${newBackOff.totalRetries})")

      //schedule tick response based on backoff
      newBackOff.backOffWaitTime
    } else {
      val backOff = ExponentialBackOff(backOffSlotTime = backOffSlotTime, constantDelay = backOffMinimumDelay)
      log.debug(s"$managerName: Set initial back-off waiting " +
        s"time to value ${backOff.backOffWaitTime} for booted actor ${managedActorName}; " +
        s"(retries=${backOff.retries}, resets=${backOff.resets}, total-retries=${backOff.totalRetries})")
      actorBackOffWaitTime.put(managedActorName, backOff)

      //schedule immediate tick response
      0 millis
    }
  }

} 
Example 37
Source File: Database.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.test

import java.sql.{Connection, ResultSet, Statement}

import org.schedoscope.dsl.{FieldLike, View}
import org.schedoscope.schema.ddl.HiveQl

import scala.collection.mutable.{HashMap, ListBuffer}

class Database(conn: Connection, url: String) {

  def selectForViewByQuery(v: View, query: String, orderByField: Option[FieldLike[_]]): List[Map[String, Any]] = {
    val res = ListBuffer[Map[String, Any]]()
    var statement: Statement = null
    var rs: ResultSet = null

    try {
      statement = conn.createStatement()
      rs = statement.executeQuery(query)

      while (rs.next()) {
        val row = HashMap[String, Any]()
        v.fields.view.zipWithIndex.foreach(f => {
          row.put(f._1.n, ViewSerDe.deserializeField(f._1.t, rs.getString(f._2 + 1)))
        })
        res.append(row.toMap)
      }
    }
    finally {
      if (rs != null) try {
        rs.close()
      } catch {
        case _: Throwable =>
      }

      if (statement != null) try {
        statement.close()
      } catch {
        case _: Throwable =>
      }
    }

    orderByField match {
      case Some(f) => res.sortBy {
        _ (f.n) match {
          case null => ""
          case other => other.toString
        }
      } toList
      case None => res.toList
    }
  }

  def selectView(v: View, orderByField: Option[FieldLike[_]]): List[Map[String, Any]] =
    selectForViewByQuery(v, HiveQl.selectAll(v), orderByField)

} 
Example 38
Source File: AETest.scala    From Scala-for-Machine-Learning-Second-Edition   with MIT License 5 votes vote down vote up
package org.scalaml.unsupervised.dl.autoencoder

import org.scalaml.{Logging, Resource}
import org.scalaml.Predef.DblVec
import org.scalaml.trading.GoogleFinancials.close
import org.scalaml.workflow.data.DataSource
import org.scalatest.{FlatSpec, Matchers}


final class AETest extends FlatSpec with Matchers with Logging with Resource {
  protected val name: String = "Auto-Encoder"

  it should s"$name single hidden layer" in {
    show( "Single hidden layer")

    val REL_PATH = "unsupervised/ae/"
    val ALPHA = 0.8
    val ETA = 0.05
    val NUM_EPOCHS = 2500
    val EPS = 1e-6
    val THRESHOLD = 0.25
    val LAMBDA = 0.18
    val BETA = 0.3

    val symbols = Array[String](
      "FXE", "FXA", "SPY", "GLD", "FXB", "FXF", "FXC", "FXY", "CYB"
    )

    val STUDIES = List[Array[String]](
      Array[String]("FXY", "FXC", "GLD", "FXA"),
      Array[String]("FXE", "FXF", "FXB", "CYB"),
      Array[String]("FXE", "FXC", "GLD", "FXA", "FXY", "FXB"),
      Array[String]("FXC", "FXY", "FXA"),
      Array[String]("CYB", "GLD", "FXY"),
      symbols
    )

    def index: Map[String, Int] =  {
      import scala.collection.mutable.HashMap
      symbols.zipWithIndex./:(HashMap[String, Int]())((mp, si)  => mp += ((si._1, si._2))).toMap
    }

    val path: String = getPath(REL_PATH).getOrElse(".")
    val prices = symbols.map(s => DataSource(s"$path$s.csv", true, true, 1))
      .map( _.flatMap(_.get(close))).filter(_.isSuccess).map(_.get)

    val config = AEConfig(ALPHA, ETA, LAMBDA, BETA, NUM_EPOCHS, EPS)
    val obs = symbols.flatMap( index.get(_)).map(prices(_).toArray)

    val xv = obs.tail.transpose.dropRight(1)

    val ae = AE(config, 8, xv.toVector)

    ae.model match {
      case Some(aeModel) =>
        if(aeModel.synapses.nonEmpty) {
          val inputSynapse = aeModel.synapses.head
          show(s"$name output synapse(0)(0) ${inputSynapse(0)(0)}")
          show(s"$name output synapse(0)(1) ${inputSynapse(0)(1)}")
          show(s"$name output synapse(1)(0) ${inputSynapse(1)(0)}")
          show(s"$name output synapse(1)(1) ${inputSynapse(1)(1)}")
        }
        else
          fail(s"$name Model weights with improper size")
      case None => fail(s"$name could not generate a model")
    }
  }
}

// ---------------------------------  EOF ---------------------------------------------------------------------------- 
Example 39
Source File: ParallelismTest.scala    From Scala-for-Machine-Learning-Second-Edition   with MIT License 5 votes vote down vote up
package org.scalaml.scalability.scala

import org.scalaml.Logging
import org.scalatest.{FlatSpec, Matchers}


final class ParallelismTest extends FlatSpec with Matchers with Logging {
  import scala.collection.mutable.HashMap
  import scala.collection.parallel.mutable.{ParArray, ParHashMap}
  import scala.util.Random

  protected[this] val name: String = "Scala parallel collections"

  final private val SZ = 100000
  final private val NUM_TASKS = 8
  final private val evalRange = Range(1, NUM_TASKS)
  final private val TIMES = 20

  // Arbitrary map function
  final val mapF = (x: Double) => Math.sin(x * 0.01) + Math.exp(-x)

  // Arbitrary filter function
  final val filterF = (x: Double) => x > 0.8

  // Arbitrary reduce function
  final val reduceF = (x: Double, y: Double) => (x + y) * x


  it should s"$name: arrays" in {
    show(s"Evaluation of arrays")

    // Generate random vector for both the non-parallel and parallel array
    val data = Array.fill(SZ)(Random.nextDouble)
    val pData = ParArray.fill(SZ)(Random.nextDouble)

    // Initialized and execute the benchmark for the parallel array
    val benchmark = new ParallelArray[Double](data, pData, TIMES)

    val ratios = new Array[Double](NUM_TASKS)
    evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n)))
    val resultMap = ratios.tail
    resultMap.sum / resultMap.size < 1.0 should be(true)
    display(resultMap, "ParArray.map")

    evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n)))
    val resultfilter = ratios.tail
    resultfilter.sum / resultfilter.size < 1.0 should be(true)
    display(resultfilter, "ParArray.filter")
  }

  it should s"$name: maps" in {
    show("Evaluation of maps")

    val mapData = new HashMap[Int, Double]
    Range(0, SZ).foreach(n => mapData.put(n, Random.nextDouble))
    val parMapData = new ParHashMap[Int, Double]
    Range(0, SZ).foreach(n => parMapData.put(n, Random.nextDouble))

    // Initialized and execute the benchmark for the parallel map
    val benchmark = new ParallelMap[Double](mapData.toMap, parMapData, TIMES)

    val ratios = new Array[Double](NUM_TASKS)
    evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n)))
    val resultMap = ratios.tail
    resultMap.sum / resultMap.size < 1.0 should be(true)
    display(resultMap, "ParMap.map")

    evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n)))
    val resultfilter = ratios.tail
    resultfilter.sum / resultfilter.size < 1.0 should be(true)
  }


  private def display(x: Array[Double], label: String): Unit = {
    import org.scalaml.plots.{Legend, LightPlotTheme, LinePlot}

    val labels = Legend(
      name,
      "Scala parallel collections",
      s"Scala parallel computation for $label",
      "Relative timing"
    )
    LinePlot.display(x.toVector, labels, new LightPlotTheme)
  }
}

// -------------------------------------------  EOF -------------------------------------------------- 
Example 40
Source File: OrderedClustering.scala    From nn_coref   with GNU General Public License v3.0 5 votes vote down vote up
package edu.berkeley.nlp.coref
import scala.collection.mutable.HashMap
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer

class OrderedClustering(val clusters: Seq[Seq[Int]]) {
  // Elements must be consecutive integers from 0 up to n
  private val allIndicesSorted = clusters.foldLeft(new ArrayBuffer[Int])(_ ++ _).sorted; 
  require(allIndicesSorted.sameElements((0 until allIndicesSorted.size).toSeq), allIndicesSorted);
  private val mentionToClusterMap = new HashMap[Int,Seq[Int]];
  for (cluster <- clusters) {
    for (i <- cluster) {
      mentionToClusterMap.put(i, cluster);
    }
  }
  
  def getCluster(idx: Int) = mentionToClusterMap(idx);
  
  def isSingleton(idx: Int) = mentionToClusterMap(idx).size == 1;
  
  def startsCluster(idx: Int) = mentionToClusterMap(idx)(0) == idx;
  
  def areInSameCluster(idx1: Int, idx2: Int) = mentionToClusterMap(idx1).contains(idx2);
  
  def getImmediateAntecedent(idx: Int) = {
    val cluster = mentionToClusterMap(idx);
    val mentIdxInCluster = cluster.indexOf(idx);
    if (mentIdxInCluster == 0) {
      -1
    } else {
      cluster(mentIdxInCluster - 1);
    }
  }
  
  def getAllAntecedents(idx: Int) = {
    val cluster = mentionToClusterMap(idx);
    cluster.slice(0, cluster.indexOf(idx));
  }
  
  def getAllConsequents(idx: Int) = {
    val cluster = mentionToClusterMap(idx);
    cluster.slice(cluster.indexOf(idx) + 1, cluster.size);
  }
  
  
  // Needed for output printing
  def getClusterIdx(idx: Int) = {
    var clusterIdx = 0;
    for (i <- 0 until clusters.size) {
      if (clusters(i).sameElements(mentionToClusterMap(idx))) {
        clusterIdx = i;
      }
    }
    clusterIdx;
  }
  
  def getSubclustering(mentIdxsToKeep: Seq[Int]): OrderedClustering = {
    val oldIndicesToNewIndicesMap = new HashMap[Int,Int]();
    (0 until mentIdxsToKeep.size).map(i => oldIndicesToNewIndicesMap.put(mentIdxsToKeep(i), i));
    val filteredConvertedClusters = clusters.map(cluster => cluster.filter(mentIdxsToKeep.contains(_)).map(mentIdx => oldIndicesToNewIndicesMap(mentIdx)));
    val filteredConvertedClustersNoEmpties = filteredConvertedClusters.filter(cluster => !cluster.isEmpty); 
    new OrderedClustering(filteredConvertedClustersNoEmpties);
  }
}

object OrderedClustering {
  
  def createFromClusterIds(clusterIds: Seq[Int]) = {
    val mentIdAndClusterId = (0 until clusterIds.size).map(i => (i, clusterIds(i)));
    val clustersUnsorted = mentIdAndClusterId.groupBy(_._2).values;
    val finalClusters = clustersUnsorted.toSeq.sortBy(_.head).map(clusterWithClusterId => clusterWithClusterId.map(_._1));
    new OrderedClustering(finalClusters.toSeq);
  }
  
  def createFromBackpointers(backpointers: Seq[Int]) = {
    var nextClusterID = 0;
    val clusters = new ArrayBuffer[ArrayBuffer[Int]]();
    val mentionToCluster = new HashMap[Int,ArrayBuffer[Int]]();
    for (i <- 0 until backpointers.size) {
      if (backpointers(i) == i) {
        val cluster = ArrayBuffer(i);
        clusters += cluster;
        mentionToCluster.put(i, cluster); 
      } else {
        val cluster = mentionToCluster(backpointers(i));
        cluster += i;
        mentionToCluster.put(i, cluster);
      }
    }
    new OrderedClustering(clusters);
  }
} 
Example 41
Source File: FeatureIndexer.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.ml


@SerialVersionUID(1L)
class HashedFeatureIndexer[Feature] private(
  val maxFeatureSize: Int,
  val hasher: (Feature => Int)) extends FeatureIndexer[Feature] {

  def size = maxFeatureSize

  def getIndex(key: Feature) = (math.abs(hasher(key)) % maxFeatureSize)
}

object HashedFeatureIndexer {
  def apply[Feature](
    maxFeatureSize: Int = (2 << 23),
    hasher: (Feature => Int) = {f: Feature => f.hashCode()}) = {

    val biggestPrimeBelow = primes.takeWhile(maxFeatureSize > _).last
    new HashedFeatureIndexer[Feature](biggestPrimeBelow, hasher)
  }

  private lazy val primes = 2 #:: sieve(3)

  private def sieve(n: Int): Stream[Int] =
    if (primes.takeWhile(p => p*p <= n).exists(n % _ == 0)) sieve(n + 2)
    else n #:: sieve(n + 2)
} 
Example 42
Source File: OutputCategoryList.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg



import java.io.FileWriter

import scala.collection.mutable.ArrayBuffer
import scala.sys.process.Process
import scala.collection.mutable.HashMap

import lexicon._

import breeze.config.CommandLineParser

object OutputCategoryList {

  case class Params(
    bank: Opts.BankInfo,
    dict: Opts.DictParams
  )

  case class CategoryInfo(sentence: GoldSuperTaggedSentence, position: Int, num: Int = 1) {
    def increment(): CategoryInfo = this.copy(num = num + 1)
    def replace(_sentence: GoldSuperTaggedSentence, _p: Int) =
      CategoryInfo(_sentence, _p, num + 1)
  }

  def main(args:Array[String]) = {

    val params = CommandLineParser.readIn[Params](args)

    val dict = new JapaneseDictionary(params.dict.categoryDictinoary)
    val bank = CCGBank.select(params.bank, dict)

    val trainSentences: Array[GoldSuperTaggedSentence] = bank.trainSentences

    val stats = new HashMap[Category, CategoryInfo]

    trainSentences foreach { sentence =>
      (0 until sentence.size) foreach { i =>
        val cat = sentence.cat(i)
        stats.get(cat) match {
          case Some(info) =>
            if (sentence.size > info.sentence.size)
              stats += ((cat, info.replace(sentence, i)))
            else
              stats += ((cat, info.increment()))
          case None => stats += ((cat, CategoryInfo(sentence, i)))
          case _ =>
        }
      }
    }
    def highlight(sentence: Sentence, i: Int) = {
      val tokens = sentence.wordSeq
      // tokens.take(i).mkString("") + s"\\x1b[1;31m{${tokens(i)}}\\x1b[0m" + tokens.drop(i+1).mkString("")
      tokens.slice(i-5, i).mkString("") + s"${tokens(i)}" + tokens.slice(i+1, i+6).mkString("")
    }

    var fw = new FileWriter("./category.lst")
    stats.toSeq.sortBy(_._2.num).reverse.foreach {
      case (cat, CategoryInfo(sentence, i, num)) =>
        fw.write("%s\t%s\t%s\t%s\n"
          .format(num, cat, sentence.pos(i), highlight(sentence, i)))
    }
    fw.flush
    fw.close

    val noFeatureCategories = new HashMap[String, CategoryInfo]
    stats foreach { case (cat, CategoryInfo(sentence, i, numWithFeat)) =>
      val noFeature = cat.toStringNoFeature
      noFeatureCategories.get(noFeature) match {
        case Some(exist) =>
          val newNum = numWithFeat + exist.num
          val newInfo = exist.copy(num = newNum)
          noFeatureCategories += (noFeature -> newInfo)
        case None =>
          noFeatureCategories += (noFeature -> CategoryInfo(sentence, i, numWithFeat))
        case _ =>
      }
    }

    fw = new FileWriter("./category.nofeature.lst")
    noFeatureCategories.toSeq.sortBy(_._2.num).reverse.foreach {
      case (cat, CategoryInfo(sentence, i, num)) =>
        fw.write("%s\t%s\t%s\t%s\n"
          .format(num, cat, sentence.pos(i), highlight(sentence, i)))
    }
    fw.flush
    fw.close
  }
} 
Example 43
Source File: HeadFinder.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg.parser



import scala.collection.mutable.HashMap
import jigg.nlp.ccg.lexicon.{PoS, JapanesePoS, Category}
import jigg.nlp.ccg.lexicon.Direction._

trait HeadFinder extends Serializable {
  type NodeInfo = HeadFinder.NodeInfo
  def get(left:NodeInfo, right:NodeInfo): Direction
}
object HeadFinder {
  case class NodeInfo(pos:PoS, category:Category, headCategory:Category)
}

case class EnglishHeadFinder(children2dir: Map[(Int, Int), Direction]) extends HeadFinder {
  def get(left:NodeInfo, right:NodeInfo) =
    children2dir.get(left.category.id, right.category.id) match {
      case Some(dir) => dir
      case _ => Left
    }
}

object EnglishHeadFinder {
  import jigg.nlp.ccg.lexicon.{ParseTree, NodeLabel, BinaryTree, NonterminalLabel}
  def createFromParseTrees(trees: Seq[ParseTree[NodeLabel]]): EnglishHeadFinder = {
    val map = new HashMap[(Int, Int), Direction]
    trees.foreach { _.foreachTree { _ match {
      case BinaryTree(left, right, NonterminalLabel(dir, _, _)) =>
        map += (left.label.category.id, right.label.category.id) -> dir
      case _ =>
    }}}
    EnglishHeadFinder(map.toMap)
  }
}

object JapaneseHeadFinder extends HeadFinder {
  val Symbol = "記号"
  def get(left:NodeInfo, right:NodeInfo) = {
    val leftPos = left.pos.first.v
    val rightPos = right.pos.first.v
    if (rightPos == Symbol) Left else Right
  }
} 
Example 44
Source File: Rule.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg.parser



import jigg.nlp.ccg.lexicon.{Category, Derivation, Point, UnaryChildPoint, BinaryChildrenPoints, AppliedRule}

import scala.collection.mutable.{HashMap, HashSet}
import java.io.{ObjectOutputStream, ObjectInputStream}

trait Rule {
  def unify(left:Category, right:Category): Option[Array[(Category, String)]]
  def raise(child:Category): Option[Array[(Category, String)]]
  def headFinder:HeadFinder
}

// rules are restricted to CFG rules extracted from the training CCGBank
case class CFGRule(val binaryRules:Map[(Int,Int), Array[(Category, String)]], // category ids -> (category, ruleType)
                   val unaryRules:Map[Int, Array[(Category, String)]],
                   override val headFinder:HeadFinder) extends Rule {
  def unify(left:Category, right:Category):Option[Array[(Category, String)]] = binaryRules.get((left.id, right.id))
  def raise(child:Category):Option[Array[(Category, String)]] = unaryRules.get(child.id)
}

object CFGRule {
  def extractRulesFromDerivations(derivations: Array[Derivation], headFinder:HeadFinder): CFGRule = {
    val binaryRules = new HashMap[(Int, Int), HashSet[(Category, String)]]
    val unaryRules = new HashMap[Int, HashSet[(Category, String)]]

    derivations.foreach { deriv =>
      deriv.foreachPoint({ point:Point => deriv.get(point) match {
        case Some(AppliedRule(UnaryChildPoint(child), ruleType)) =>
          val parents = unaryRules.getOrElseUpdate(child.category.id, new HashSet[(Category, String)])
          parents += ((point.category, ruleType))
        case Some(AppliedRule(BinaryChildrenPoints(left, right), ruleType)) =>
          val parents = binaryRules.getOrElseUpdate((left.category.id, right.category.id), new HashSet[(Category, String)])
          parents += ((point.category, ruleType))
        case _ =>
      }})
    }
    new CFGRule(binaryRules.map { case (k, v) => k -> v.toArray }.toMap,
                unaryRules.map { case (k, v) => k -> v.toArray }.toMap,
                headFinder)
  }
} 
Example 45
Source File: SuperTaggerModel.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg



import tagger.{LF=>Feature, MaxEntMultiTagger, MaxEntMultiTaggerTrainer, FeatureExtractors}
import lexicon._
import jigg.ml._

import scala.collection.mutable.HashMap

case class SuperTaggerModel(
  dict: Dictionary,
  featureMap: HashMap[Feature, Int],
  weights: WeightVec,
  extractors: FeatureExtractors) { self =>

  def reduceFeatures(): SuperTaggerModel = {

    val buffer = weights.asInstanceOf[GrowableWeightVector[Float]].array // 0 1.0 2.0 0 0 1.0 ...
    val activeIdxs = buffer.zipWithIndex filter (_._1 != 0) map (_._2)  // 1 2 5
    println(s"# features reduced from ${buffer.size} to ${activeIdxs.size}")
    val idxMap = activeIdxs.zipWithIndex.toMap // {1->0, 2->1 5->2}

    val newFeatureMap = featureMap collect {
      case (f, oldIdx) if idxMap.isDefinedAt(oldIdx) => (f, idxMap(oldIdx))
    }
    val newWeights = new FixedWeightVector[Float](activeIdxs.map(buffer).toArray)

    this copy (featureMap = newFeatureMap, weights = newWeights)
  }

  def mkMultiTaggerTrainer(classifierTrainer: OnlineLogLinearTrainer[Int]) =
    new MaxEntMultiTaggerTrainer(mkIndexer(), extractors, classifierTrainer, dict)

  def mkMultiTagger() =
    new MaxEntMultiTagger(mkIndexer(), extractors, mkClassifier(), dict)

  def mkClassifier() = new LogLinearClassifier[Int] {
    override val weights = self.weights
  }

  private def mkIndexer() = new ExactFeatureIndexer(featureMap)
}

object SuperTaggerModel {

  def saveTo(path: String, model: SuperTaggerModel) = {
    System.err.println("Saving tagger model to " + path)
    val os = jigg.util.IOUtil.openBinOut(path)
    os.writeObject(model)
    os.close
  }

  def loadFrom(path: String): SuperTaggerModel = {
    jigg.util.LogUtil.track("Loading supertagger model ...") {
      val in = jigg.util.IOUtil.openBinIn(path)
      val model = in.readObject.asInstanceOf[SuperTaggerModel]
      in.close
      model
    }
  }
} 
Example 46
Source File: CategoryManager.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg.lexicon



import scala.collection.mutable.HashMap
import scala.collection.mutable.ArrayBuffer

class CategoryManager extends StringBaseNumberedManager[Category] with OptionReturner[Category] {
  override def createWithId(original:Category): Category = original match {
    case AtomicCategory(id, base, avm) => AtomicCategory(newId, base, avm)
    case ComplexCategory(id, left, right, slash) =>
      val leftWithId = assignID(left)
      val rightWithId = assignID(right)
      ComplexCategory(newId, leftWithId, rightWithId, slash)
  }
  override def getOrNone(str:String): Option[Category] = str2objIndex.get(str) match {
    case Some(i) => Some(objects(i))
    case None => canonicalMap.get(createCanonicalInstance(str))
  }

  override def createCanonicalInstance(str:String): Category = JapaneseCategoryParser.parse(str)

  // This is used when candidate shift category is empty
  // It sometimes happen if for example, PoS not registered in the dictionary is detected.
  val unkCategory = getOrCreate("UNK")
} 
Example 47
Source File: CategoryDictionary.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg.lexicon


import scala.collection.mutable.HashMap

@SerialVersionUID(1L)
sealed trait CategoryDictionary extends Serializable {
  type Key
  type UnkKey
  val categoryMap = new HashMap[Key, Array[Category]]
  val unkCategoryMap = new HashMap[UnkKey, Array[Category]]

  def key(word:Word, pos:PoS):Key
  def unkKey(pos:PoS):UnkKey

  def getCandidates(word:Word, pos:PoS):Array[Category] = categoryMap.get(key(word, pos)) match {
    case Some(categories) => categories
    case None => unkCategoryMap.get(unkKey(pos)) match {
      case Some(categories) => categories
      case None => Array[Category]()
    }
  }
  def registCandidates(word:Word, pos:PoS, candidates:Array[Category]) = key(word, pos) match {
    case k => categoryMap += (k -> (categoryMap.get(k) match {
      case Some(alreadyExist) => (candidates ++ alreadyExist).distinct
      case None => candidates.distinct
    }))
  }
  def registUnkCandiates(pos:PoS, candidates:Array[Category]) = unkKey(pos) match {
    case k => unkCategoryMap += (k -> (unkCategoryMap.get(k) match {
      case Some(alreadyExist) => (candidates ++ alreadyExist).distinct
      case None => candidates.distinct
    }))
  }
  def resetWithSentences(sentences: Seq[GoldSuperTaggedSentence], unkThreathold: Int) = {
    val counts = new HashMap[Key, Int]
    sentences foreach { sentence => (0 until sentence.size) foreach { i =>
      val k = key(sentence.base(i), sentence.pos(i))
      counts.getOrElseUpdate(k, 0)
      counts(k) += 1
    }}

    sentences foreach { sentence => (0 until sentence.size) foreach { i =>
      val k = key(sentence.base(i), sentence.pos(i))
      if (counts(k) >= unkThreathold) registCandidates(sentence.base(i), sentence.pos(i), Array(sentence.cat(i)))
      registUnkCandiates(sentence.pos(i), Array(sentence.cat(i)))
    }}
  }
}

class Word2CategoryDictionary extends CategoryDictionary {
  type Key = Int
  type UnkKey = Int
  override def key(word:Word, pos:PoS) = word.id
  override def unkKey(pos:PoS) = pos.id
}

class WordPoS2CategoryDictionary extends CategoryDictionary {
  type Key = (Int, Int)
  type UnkKey = Int
  override def key(word:Word, pos:PoS) = (word.id, pos.id)
  override def unkKey(pos:PoS) = pos.id
}

class WordSecondFineTag2CategoryDictionary extends CategoryDictionary {
  override type Key = (Int, Int)
  override type UnkKey = Int
  override def key(word:Word, pos:PoS) = (word.id, pos.second.id)
  override def unkKey(pos:PoS) = pos.second.id
}

class WordSecondWithConj2CategoryDictionary extends CategoryDictionary {
  override type Key = (Int, Int)
  override type UnkKey = Int
  override def key(word:Word, pos:PoS) = (word.id, pos.secondWithConj.id)
  override def unkKey(pos:PoS) = pos.secondWithConj.id
} 
Example 48
Source File: CategoryFeature.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg.lexicon


trait CategoryFeature {
  def kvs: Seq[(String, String)]
  def unify(lhs: CategoryFeature): Boolean = false // TODO: implement
}

@SerialVersionUID(-8236395926230742650L)
case class JPCategoryFeature(values: Seq[String]) extends CategoryFeature {
  import JPCategoryFeature._

  override def kvs = keys zip values
  override def toString = kvs.filter(_._2 != "").map { case (k, v) => k + "=" + v }.mkString(",")
}

object JPCategoryFeature {
  // This is a hard-coded mapping of feature structure of Japanese category.
  private val k2vals = Map(
    "mod" -> Array("adv", "adn", "nm"),
    "form" -> Array("attr", "base", "cont", "hyp", "imp",
      "beg", "stem", "ta", "te", "pre", "r", "neg", "s", "da"),
    "case" -> Array("ga", "o", "ni", "to", "nc", "caus"),
    "fin" -> Array("f", "t"))

  private val keys = k2vals.keys.toSeq
  private val v2keyIdx = {
    val key2idx = keys.zipWithIndex.toMap
    k2vals.flatMap { case (key, vals) =>
      vals.map { v => v -> key2idx(key) }
    }
  }
  val kvpair = """\w+=(\w+)""".r

  def createFromValues(values: Seq[String]) = values match {
    case Seq() => emptyFeature
    case _ =>
      val sortedValues = Array.fill(keys.size)("")
      values.filter(_!="").foreach { value =>
        val v = value match { case kvpair(v) => v; case v => v }

        if (v(0) != 'X')
          v2keyIdx(v) match { case i => sortedValues(i) = v }
      }
      JPCategoryFeature(sortedValues)
  }
  // We cache this because most categories don't have a feature
  private val emptyFeature = JPCategoryFeature(Array.fill(keys.size)(""))
}

case class EnCategoryFeature(values: Seq[String]) extends CategoryFeature {
  override def kvs = values.zipWithIndex.map { case (v, k) => (k.toString, v) }
  override def toString = values.mkString(",")
}

object EnCategoryFeature {
  def createFromValues(values: Seq[String]) = EnCategoryFeature(values.sortWith(_ < _))
} 
Example 49
Source File: LocalKMeans.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 50
Source File: JsonUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    implicit val ordering = new Ordering[TopicPartition] {
      override def compare(x: TopicPartition, y: TopicPartition): Int = {
        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
      }
    }
    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
    partitions.foreach { tp =>
        val off = partitionOffsets(tp)
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 51
Source File: MasterWebUI.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import scala.collection.mutable.HashMap

import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.deploy.master.Master
import org.apache.spark.internal.Logging
import org.apache.spark.ui.{SparkUI, WebUI}
import org.apache.spark.ui.JettyUtils._


  def initialize() {
    val masterPage = new MasterPage(this)
    attachPage(new ApplicationPage(this))
    attachPage(masterPage)
    attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
    attachHandler(createRedirectHandler(
      "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST")))
    attachHandler(createRedirectHandler(
      "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST")))
  }

  def addProxyTargets(id: String, target: String): Unit = {
    var endTarget = target.stripSuffix("/")
    val handler = createProxyHandler("/proxy/" + id, endTarget)
    attachHandler(handler)
    proxyHandlers(id) = handler
  }

  def removeProxyTargets(id: String): Unit = {
    proxyHandlers.remove(id).foreach(detachHandler)
  }
}

private[master] object MasterWebUI {
  private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
} 
Example 52
Source File: PoolTable.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import java.net.URLEncoder

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.progressListener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8"))
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 53
Source File: ConfigReader.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

} 
Example 54
Source File: StageInfo.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.storage.RDDInfo


  def fromStage(
      stage: Stage,
      attemptId: Int,
      numTasks: Option[Int] = None,
      taskMetrics: TaskMetrics = null,
      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
    ): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details,
      taskMetrics,
      taskLocalityPreferences)
  }
} 
Example 55
Source File: GroupedCountEvaluator.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  private var outputsMerged = 0
  private val sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap
    }
  }
} 
Example 56
Source File: MasterWebUISuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import java.io.DataOutputStream
import java.net.{HttpURLConnection, URL}
import java.nio.charset.StandardCharsets
import java.util.Date

import scala.collection.mutable.HashMap

import org.mockito.Mockito.{mock, times, verify, when}
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver}
import org.apache.spark.deploy.DeployTestUtils._
import org.apache.spark.deploy.master._
import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}


class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {

  val conf = new SparkConf
  val securityMgr = new SecurityManager(conf)
  val rpcEnv = mock(classOf[RpcEnv])
  val master = mock(classOf[Master])
  val masterEndpointRef = mock(classOf[RpcEndpointRef])
  when(master.securityMgr).thenReturn(securityMgr)
  when(master.conf).thenReturn(conf)
  when(master.rpcEnv).thenReturn(rpcEnv)
  when(master.self).thenReturn(masterEndpointRef)
  val masterWebUI = new MasterWebUI(master, 0)

  override def beforeAll() {
    super.beforeAll()
    masterWebUI.bind()
  }

  override def afterAll() {
    masterWebUI.stop()
    super.afterAll()
  }

  test("kill application") {
    val appDesc = createAppDesc()
    // use new start date so it isn't filtered by UI
    val activeApp = new ApplicationInfo(
      new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue)

    when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp)))

    val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/"
    val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify the master was called to remove the active app
    verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED)
  }

  test("kill driver") {
    val activeDriverId = "driver-0"
    val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/"
    val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify that master was asked to kill driver with the correct id
    verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId))
  }

  private def convPostDataToString(data: Map[String, String]): String = {
    (for ((name, value) <- data) yield s"$name=$value").mkString("&")
  }

  
  private def sendHttpRequest(
      url: String,
      method: String,
      body: String = ""): HttpURLConnection = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod(method)
    if (body.nonEmpty) {
      conn.setDoOutput(true)
      conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded")
      conn.setRequestProperty("Content-Length", Integer.toString(body.length))
      val out = new DataOutputStream(conn.getOutputStream)
      out.write(body.getBytes(StandardCharsets.UTF_8))
      out.close()
    }
    conn
  }
} 
Example 57
Source File: exercise02.scala    From scala-for-the-Impatient   with MIT License 5 votes vote down vote up
import scala.collection.mutable.{ListBuffer, HashMap}
def mapStrIndex(str:String)={
  var indexMap = new HashMap[Char,ListBuffer[Int]]()
  var i = 0
  str.toCharArray.foreach {
    c =>
      indexMap.get(c) match {
        case Some(result) => result += i
        case None => indexMap += (c -> ListBuffer {
          i
        })
      }
      i += 1
  }
  indexMap

}
println(mapStrIndex("Mississippi")) 
Example 58
Source File: exercise01.scala    From scala-for-the-Impatient   with MIT License 5 votes vote down vote up
import scala.collection.SortedSet
import scala.collection.mutable.HashMap
def mapStrIndex(str:String)={
  var indexMap = new HashMap[Char,SortedSet[Int]]()
  var i = 0
  str.toCharArray.foreach {
    c =>
      indexMap.get(c) match {
        case Some(result) => indexMap(c) = result + i
        case None => indexMap += (c -> SortedSet {
          i
        })
      }
      i += 1
  }
  indexMap

}
println(mapStrIndex("Mississippi")) 
Example 59
Source File: BlockStoreShuffleFetcher.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.shuffle.hash

import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
import scala.util.{Failure, Success, Try}

import org.apache.spark._
import org.apache.spark.serializer.Serializer
import org.apache.spark.shuffle.FetchFailedException
import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId}
import org.apache.spark.util.CompletionIterator

private[hash] object BlockStoreShuffleFetcher extends Logging {
  def fetch[T](
      shuffleId: Int,
      reduceId: Int,
      context: TaskContext,
      serializer: Serializer)
    : Iterator[T] =
  {
    logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId))
    val blockManager = SparkEnv.get.blockManager

    val startTime = System.currentTimeMillis
    val statuses = SparkEnv.get.mapOutputTracker.getServerStatuses(shuffleId, reduceId)
    logDebug("Fetching map output location for shuffle %d, reduce %d took %d ms".format(
      shuffleId, reduceId, System.currentTimeMillis - startTime))

    val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(Int, Long)]]
    for (((address, size), index) <- statuses.zipWithIndex) {
      splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size))
    }

    val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = splitsByAddress.toSeq.map {
      case (address, splits) =>
        (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2)))
    }

    def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = {
      val blockId = blockPair._1
      val blockOption = blockPair._2
      blockOption match {
        case Success(block) => {
          block.asInstanceOf[Iterator[T]]
        }
        case Failure(e) => {
          blockId match {
            case ShuffleBlockId(shufId, mapId, _) =>
              val address = statuses(mapId.toInt)._1
              throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
            case _ =>
              throw new SparkException(
                "Failed to get block " + blockId + ", which is not a shuffle block", e)
          }
        }
      }
    }

    val blockFetcherItr = new ShuffleBlockFetcherIterator(
      context,
      SparkEnv.get.blockManager.shuffleClient,
      blockManager,
      blocksByAddress,
      serializer,
      SparkEnv.get.conf.getLong("spark.reducer.maxMbInFlight", 48) * 1024 * 1024)
    val itr = blockFetcherItr.flatMap(unpackBlock)

    val completionIter = CompletionIterator[T, Iterator[T]](itr, {
      context.taskMetrics.updateShuffleReadMetrics()
    })

    new InterruptibleIterator[T](context, completionIter) {
      val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency()
      override def next(): T = {
        readMetrics.incRecordsRead(1)
        delegate.next()
      }
    }
  }
} 
Example 60
Source File: ExecutorsTab.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.exec

import scala.collection.mutable.HashMap

import org.apache.spark.ExceptionFailure
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler._
import org.apache.spark.storage.StorageStatusListener
import org.apache.spark.ui.{SparkUI, SparkUITab}

private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") {
  val listener = parent.executorsListener
  val sc = parent.sc
  val threadDumpEnabled =
    sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true)

  attachPage(new ExecutorsPage(this, threadDumpEnabled))
  if (threadDumpEnabled) {
    attachPage(new ExecutorThreadDumpPage(this))
  }
}


@DeveloperApi
class ExecutorsListener(storageStatusListener: StorageStatusListener) extends SparkListener {
  val executorToTasksActive = HashMap[String, Int]()
  val executorToTasksComplete = HashMap[String, Int]()
  val executorToTasksFailed = HashMap[String, Int]()
  val executorToDuration = HashMap[String, Long]()
  val executorToInputBytes = HashMap[String, Long]()
  val executorToInputRecords = HashMap[String, Long]()
  val executorToOutputBytes = HashMap[String, Long]()
  val executorToOutputRecords = HashMap[String, Long]()
  val executorToShuffleRead = HashMap[String, Long]()
  val executorToShuffleWrite = HashMap[String, Long]()
  val executorToLogUrls = HashMap[String, Map[String, String]]()

  def storageStatusList = storageStatusListener.storageStatusList

  override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded) = synchronized {
    val eid = executorAdded.executorId
    executorToLogUrls(eid) = executorAdded.executorInfo.logUrlMap
  }

  override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
    val eid = taskStart.taskInfo.executorId
    executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1
  }

  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
    val info = taskEnd.taskInfo
    if (info != null) {
      val eid = info.executorId
      executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1
      executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration
      taskEnd.reason match {
        case e: ExceptionFailure =>
          executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1
        case _ =>
          executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1
      }

      // Update shuffle read/write
      val metrics = taskEnd.taskMetrics
      if (metrics != null) {
        metrics.inputMetrics.foreach { inputMetrics =>
          executorToInputBytes(eid) =
            executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead
          executorToInputRecords(eid) =
            executorToInputRecords.getOrElse(eid, 0L) + inputMetrics.recordsRead
        }
        metrics.outputMetrics.foreach { outputMetrics =>
          executorToOutputBytes(eid) =
            executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten
          executorToOutputRecords(eid) =
            executorToOutputRecords.getOrElse(eid, 0L) + outputMetrics.recordsWritten
        }
        metrics.shuffleReadMetrics.foreach { shuffleRead =>
          executorToShuffleRead(eid) =
            executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead
        }
        metrics.shuffleWriteMetrics.foreach { shuffleWrite =>
          executorToShuffleWrite(eid) =
            executorToShuffleWrite.getOrElse(eid, 0L) + shuffleWrite.shuffleBytesWritten
        }
      }
    }
  }

} 
Example 61
Source File: UIData.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable.HashMap

private[jobs] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)
} 
Example 62
Source File: PoolTable.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.listener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath), p.name)
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 63
Source File: StageInfo.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.storage.RDDInfo


  def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      stage.attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.details)
  }
} 
Example 64
Source File: GroupedSumEvaluator.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val sum = entry.getValue.sum
        result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val meanEstimate = counter.mean
        val meanVar = counter.sampleVariance / counter.count
        val countEstimate = (counter.count + 1 - p) / p
        val countVar = (counter.count + 1) * (1 - p) / (p * p)
        val sumEstimate = meanEstimate * countEstimate
        val sumVar = (meanEstimate * meanEstimate * countVar) +
                     (countEstimate * countEstimate * meanVar) +
                     (meanVar * countVar)
        val sumStdev = math.sqrt(sumVar)
        val confFactor = studentTCacher.get(counter.count)
        val low = sumEstimate - confFactor * sumStdev
        val high = sumEstimate + confFactor * sumStdev
        result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high)
      }
      result
    }
  }
} 
Example 65
Source File: GroupedCountEvaluator.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.commons.math3.distribution.NormalDistribution

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T,Long], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new OpenHashMap[T,Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T,Long]) {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        result(key) = new BoundedDouble(sum, 1.0, sum, sum)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val confFactor = new NormalDistribution().
        inverseCumulativeProbability(1 - (1 - confidence) / 2)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        val mean = (sum + 1 - p) / p
        val variance = (sum + 1) * (1 - p) / (p * p)
        val stdev = math.sqrt(variance)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result(key) = new BoundedDouble(mean, confidence, low, high)
      }
      result
    }
  }
} 
Example 66
Source File: GroupedMeanEvaluator.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val mean = entry.getValue.mean
        result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val mean = counter.mean
        val stdev = math.sqrt(counter.sampleVariance / counter.count)
        val confFactor = studentTCacher.get(counter.count)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result(entry.getKey) = new BoundedDouble(mean, confidence, low, high)
      }
      result
    }
  }
} 
Example 67
Source File: filter_errorcode.scala    From scalabpe   with Apache License 2.0 5 votes vote down vote up
package scalabpe.plugin

import scala.collection.mutable.HashMap
import scala.xml.Node

import scalabpe.core.DummyActor
import scalabpe.core.HashMapStringAny
import scalabpe.core.Logging
import scalabpe.core.Request
import scalabpe.core.Response
import scalabpe.core.ResponseFilter
import scalabpe.core.Router

class ErrorCodeDefine(val resultCodeName: String, val resultMsgName: String);

class ErrorDescResponseFilter(val router: Router, val cfgNode: Node) extends ResponseFilter with Logging {

    val cfgs = new HashMap[Int, ErrorCodeDefine]()
    var localCacheServiceId = 0
    val dummyActor = new DummyActor()

    init

    def init() {

        var s = (cfgNode \ "@localCacheServiceId").toString
        if (s != "")
            localCacheServiceId = s.toInt

        val serviceNodes = (cfgNode \ "Service")
        for (p <- serviceNodes) {

            val serviceId = (p \ "@serviceId").toString.toInt
            val resultCodeName = (p \ "@resultCodeField").toString
            val resultMsgName = (p \ "@resultMsgField").toString

            cfgs.put(serviceId, new ErrorCodeDefine(resultCodeName, resultMsgName))

            // log.info("serviceId=%d,resultCodeName=%s,resultMsgName=%s".format(serviceId,resultCodeName,resultMsgName))
        }

        log.info("errorcode response filter created")
    }

    def filter(res: Response, req: Request): Unit = {

        // log.info("error response filter called, res={}",res.toString)

        val rd = cfgs.getOrElse(res.serviceId, null)
        if (rd == null) return

        if (rd.resultCodeName != "") {
            if (res.body.getOrElse(rd.resultCodeName, null) == null) {
                res.body.put(rd.resultCodeName, res.code)
            }
        }

        if (res.code == 0) return
        if (rd.resultMsgName == "") return
        if (res.body.getOrElse(rd.resultMsgName, null) != null) return

        val body = new HashMapStringAny()
        body.put("resultCode", res.code)

        val req = new Request(
            res.requestId + ":$",
            Router.DO_NOT_REPLY,
            res.sequence,
            res.encoding,
            localCacheServiceId,
            1,
            new HashMapStringAny(),
            body,
            dummyActor)

        val invokeResult = router.send(req)
        if (invokeResult == null) return

        val resultMsg = invokeResult.s("resultMsg", "")
        if (resultMsg != "")
            res.body.put(rd.resultMsgName, resultMsg)
    }

} 
Example 68
Source File: httpserverplugin_staticfile.scala    From scalabpe   with Apache License 2.0 5 votes vote down vote up
package scalabpe.plugin.http

import java.io.File
import java.net.URLEncoder
import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.GregorianCalendar
import java.util.Locale
import java.util.TimeZone

import scala.collection.mutable.HashMap

import org.jboss.netty.handler.codec.http.HttpHeaders

import scalabpe.core.HashMapStringAny

class StaticFilePlugin extends HttpServerPlugin with HttpServerStaticFilePlugin {

    val ETAG_TAG = "etag"
    val EXPIRE_TAG = "expire"
    val ATTACHMENT = "attachment"
    val FILENAME = "filename"

    val HTTP_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz";
    val HTTP_DATE_GMT_TIMEZONE = "GMT";

    val df_tl = new ThreadLocal[SimpleDateFormat]() {
        override def initialValue(): SimpleDateFormat = {
            val df = new SimpleDateFormat(HTTP_DATE_FORMAT, Locale.US)
            df.setTimeZone(TimeZone.getTimeZone(HTTP_DATE_GMT_TIMEZONE));
            df
        }
    }

    def generateStaticFile(serviceId: Int, msgId: Int, errorCode: Int, errorMessage: String, body: HashMapStringAny, pluginParam: String, headers: HashMap[String, String]): String = {

        if (body.ns(FILENAME) == "") {
            return null
        }

        val filename = body.ns(FILENAME)
        if (!new File(filename).exists()) {
            return null
        }
        if (body.ns(ETAG_TAG) != "") {
            headers.put("ETag", body.ns(ETAG_TAG))
        }

        if (body.ns(EXPIRE_TAG) != "") {
            body.i(EXPIRE_TAG) match {
                case 0 | -1 =>
                    headers.put(HttpHeaders.Names.CACHE_CONTROL, "no-cache")
                case n => // seconds
                    val time = new GregorianCalendar();
                    time.add(Calendar.SECOND, n);
                    headers.put(HttpHeaders.Names.EXPIRES, df_tl.get.format(time.getTime()));
                    headers.put(HttpHeaders.Names.CACHE_CONTROL, "max-age=" + n);
            }
        }

        val ext = parseExt(filename)
        if (ext != "")
            body.put("__file_ext__", ext)

        if (body.ns(ATTACHMENT, "1") == "1") {
            val filename = body.ns(FILENAME)
            val v = "attachment; filename=\"%s\"".format(URLEncoder.encode(parseFilename(filename), "UTF-8"))
            headers.put("Content-Disposition", v)
        }

        filename
    }

    def parseFilename(name: String): String = {
        val p = name.lastIndexOf("/")
        if (p < 0) return name
        name.substring(p + 1)
    }
    def parseExt(name: String): String = {
        val p = name.lastIndexOf(".")
        if (p < 0) return ""
        name.substring(p + 1).toLowerCase()
    }

} 
Example 69
Source File: format_flow.scala    From scalabpe   with Apache License 2.0 5 votes vote down vote up
package scalabpe

import java.io._
import scala.collection.mutable.HashMap
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
import org.apache.commons.io.FileUtils
import scala.xml._
import scala.collection.mutable._
import scalabpe.core._
import org.apache.commons.lang.StringUtils
import Tools._



object FormatFlowTool {

    def help() {
        println(
"""
usage: scalabpe.FormatFlowTool [options] dirname
options:
    -h|--help               帮助信息
""")
    }

    def parseArgs(args:Array[String]):HashMapStringAny = {
        val map = HashMapStringAny()
        var i = 0
        val files = ArrayBufferString()
        while(i < args.size) {
            args(i) match {
                case "-h" | "--help" => 
                    return null
                case s if s.startsWith("-") => 
                    println("invalid option "+s)
                    return null
                case _ => 
                    files += args(i)
                    i += 1
            }
        }
        map.put("files",files)
        map
    }

    def main(args:Array[String]) {

        var params = parseArgs(args)
        if( params == null ) {
            help()
            return
        }
        var files = params.nls("files")
        if( files.size == 0 ) {
            help()
            return
        }

        var dir = files(0)
        if( !new File(dir).exists() ) {
            val p1 = "compose_conf"+File.separator+dir
            if( new File(p1).exists ) {
                dir = p1
            } else {
                println("not a valid dir, dir="+dir)
                return
            }
        }

        processDir(dir,params)
    }

    def processDir(dir:String,params:HashMapStringAny) {
        val files = new File(dir).listFiles.filter(_.getName.endsWith(".flow"))
        for(f <- files ) {
            processFile(dir,f.getName,params)
        }
    }

    def processFile(dir:String,f:String,params:HashMapStringAny) {
        val lines = readAllLines(dir+File.separator+f)
        // TODO
    }

} 
Example 70
Source File: GraphMap.scala    From stellar-random-walk   with Apache License 2.0 5 votes vote down vote up
package au.csiro.data61.randomwalk.algorithm

import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, HashMap}



  def reset {
    indexCounter = 0
    offsetCounter = 0
    srcVertexMap.clear()
    offsets.clear()
    lengths.clear()
    edges.clear()
    vertexPartitionMap.clear
  }

  def getNeighbors(vid: Int): Array[(Int, Float)] = {
    srcVertexMap.get(vid) match {
      case Some(index) =>
        if (index == -1) {
          return Array.empty[(Int, Float)]
        }
        val offset = offsets(index)
        val length = lengths(index)
        edges.slice(offset, offset + length).toArray
      case None => null
    }
  }
} 
Example 71
Source File: HeaderEnum.scala    From testchipip   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package testchipip

import chisel3._
import chisel3.util.log2Up
import scala.collection.mutable.{HashMap, ListBuffer}

class HeaderEnum(val prefix: String) {
  val h = new HashMap[String,Int]
  def makeHeader(): String = {
    h.toSeq.sortBy(_._2).map { case (n,i) => s"#define ${prefix.toUpperCase}_${n.toUpperCase} $i\n" } mkString
  }
  def apply(s: String): UInt = h(s).U(log2Up(h.size).W)
}

object HeaderEnum {
  val contents = new ListBuffer[String]

  def apply(prefix: String, names: String*): HeaderEnum = {
    val e = new HeaderEnum(prefix)
    names.zipWithIndex.foreach { case (n,i) => e.h.put(n,i) }
    val header = e.makeHeader()
    if(!HeaderEnum.contents.contains(header)) HeaderEnum.contents += header
    e
  }
} 
Example 72
Source File: Mapper.scala    From CSYE7200_Old   with MIT License 5 votes vote down vote up
package edu.neu.coe.csye7200.mapreduce

import akka.actor.{Actor, ActorLogging, ActorRef}

import scala.collection.mutable
import scala.collection.mutable.HashMap
import scala.util._


class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) {
  
  override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]) = {
      val v2sK2m = mutable.HashMap[K2,Seq[V2]]() // mutable
      val xs = Seq[Throwable]() // mutable
      for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t))
        v2k2e match {
          case Right((k2,v2)) => v2sK2m put(k2, v2+:v2sK2m.getOrElse((k2), (Nil)))
          case Left(x) => xs :+ x
      }
      (v2sK2m.toMap, xs)
  }
}

case class Incoming[K, V](m: Seq[(K,V)]) {
  override def toString = s"Incoming: with ${m.size} elements"
}

object Incoming {
  def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap})
  def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq)
}

object Mapper {
} 
Example 73
Source File: Labels.scala    From jgo   with GNU General Public License v3.0 5 votes vote down vote up
package jgo.tools.compiler
package parser.stmts

import parser.exprs._
import parser.scoped._
import parser.funcs._

import interm._
import types._
import symbol._
import codeseq._
import instr._

import scala.collection.mutable.{HashMap, HashSet, ListBuffer}
import scala.{collection => coll}
import coll.{immutable => imm}


trait Labels {
  private val seenDefs   = HashSet[String]()
  private val unseenDefs = HashMap[String, ListBuffer[Pos]]()
  private val lbls = HashMap[String, UserLabel]()
  
  def defLabel(name: String, pos: Pos): (String, Err[UserLabel]) =
    if (seenDefs contains name)
      (name, problem("label %s already defined", name)(pos))
    else {
      seenDefs += name
      unseenDefs -= name
      val label = lbls getOrElseUpdate (name, new UserLabel(name))
      (name, result(label))
    }
  
  def useLabel(pos: Pos, name: String): UserLabel = {
    if (!(seenDefs contains name))
      unseenDefs.getOrElseUpdate(name, new ListBuffer) += pos
    lbls getOrElseUpdate (name, new UserLabel(name))
  }
  
  def procGoto(pos: Pos, name: String): Err[CodeBuilder] = {
    result(Goto(useLabel(pos, name)))
  }
  
  def checkForUndefedLabels: Err[Unit] = {
    var issues: Err[Unit] = result(())
    for ((lblName, positions) <- unseenDefs; pos <- positions) {
      issues = issues then problem("target label not found: %s", lblName)(pos)
    }
    issues
  }
} 
Example 74
Source File: LocalKMeans.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 75
Source File: JsonUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    implicit val ordering = new Ordering[TopicPartition] {
      override def compare(x: TopicPartition, y: TopicPartition): Int = {
        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
      }
    }
    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
    partitions.foreach { tp =>
        val off = partitionOffsets(tp)
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 76
Source File: ThriftServerMonitor.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.thriftserver.monitor

import scala.collection.mutable.HashMap

import org.apache.spark.SparkException
import org.apache.spark.internal.Logging
import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab

object ThriftServerMonitor extends Logging {

  private[this] val uiTabs = new HashMap[String, ThriftServerTab]()

  private[this] val listeners = new HashMap[String, ThriftServerListener]()

  def setListener(user: String, sparkListener: ThriftServerListener): Unit = {
    listeners.put(user, sparkListener)
  }

  def getListener(user: String): ThriftServerListener = {
    listeners.getOrElse(user, throw new SparkException(s"Listener does not init for user[$user]"))
  }

  def addUITab(user: String, ui: ThriftServerTab): Unit = {
    uiTabs.put(user, ui)
  }

  def detachUITab(user: String): Unit = {
    listeners.remove(user)
    uiTabs.get(user).foreach(_.detach())
  }

  def detachAllUITabs(): Unit = {
    uiTabs.values.foreach(_.detach())
  }
} 
Example 77
Source File: MasterWebUI.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import scala.collection.mutable.HashMap

import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.deploy.master.Master
import org.apache.spark.internal.Logging
import org.apache.spark.ui.{SparkUI, WebUI}
import org.apache.spark.ui.JettyUtils._


  def initialize() {
    val masterPage = new MasterPage(this)
    attachPage(new ApplicationPage(this))
    attachPage(masterPage)
    attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
    attachHandler(createRedirectHandler(
      "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST")))
    attachHandler(createRedirectHandler(
      "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST")))
  }

  def addProxyTargets(id: String, target: String): Unit = {
    var endTarget = target.stripSuffix("/")
    val handler = createProxyHandler("/proxy/" + id, endTarget)
    attachHandler(handler)
    proxyHandlers(id) = handler
  }

  def removeProxyTargets(id: String): Unit = {
    proxyHandlers.remove(id).foreach(detachHandler)
  }
}

private[master] object MasterWebUI {
  private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
} 
Example 78
Source File: PoolTable.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import java.net.URLEncoder

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.progressListener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath, sparkUser = parent.sparkUser), URLEncoder.encode(p.name, "UTF-8"))
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 79
Source File: ConfigReader.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

} 
Example 80
Source File: StageInfo.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.storage.RDDInfo


  def fromStage(
      stage: Stage,
      attemptId: Int,
      numTasks: Option[Int] = None,
      taskMetrics: TaskMetrics = null,
      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
    ): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details,
      taskMetrics,
      taskLocalityPreferences)
  }
} 
Example 81
Source File: GroupedCountEvaluator.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  private var outputsMerged = 0
  private val sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap
    }
  }
} 
Example 82
Source File: MasterWebUISuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import java.io.DataOutputStream
import java.net.{HttpURLConnection, URL}
import java.nio.charset.StandardCharsets
import java.util.Date

import scala.collection.mutable.HashMap

import org.mockito.Mockito.{mock, times, verify, when}
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver}
import org.apache.spark.deploy.DeployTestUtils._
import org.apache.spark.deploy.master._
import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}


class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {

  val conf = new SparkConf
  val securityMgr = new SecurityManager(conf)
  val rpcEnv = mock(classOf[RpcEnv])
  val master = mock(classOf[Master])
  val masterEndpointRef = mock(classOf[RpcEndpointRef])
  when(master.securityMgr).thenReturn(securityMgr)
  when(master.conf).thenReturn(conf)
  when(master.rpcEnv).thenReturn(rpcEnv)
  when(master.self).thenReturn(masterEndpointRef)
  val masterWebUI = new MasterWebUI(master, 0)

  override def beforeAll() {
    super.beforeAll()
    masterWebUI.bind()
  }

  override def afterAll() {
    masterWebUI.stop()
    super.afterAll()
  }

  test("kill application") {
    val appDesc = createAppDesc()
    // use new start date so it isn't filtered by UI
    val activeApp = new ApplicationInfo(
      new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue)

    when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp)))

    val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/"
    val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify the master was called to remove the active app
    verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED)
  }

  test("kill driver") {
    val activeDriverId = "driver-0"
    val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/"
    val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify that master was asked to kill driver with the correct id
    verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId))
  }

  private def convPostDataToString(data: Map[String, String]): String = {
    (for ((name, value) <- data) yield s"$name=$value").mkString("&")
  }

  
  private def sendHttpRequest(
      url: String,
      method: String,
      body: String = ""): HttpURLConnection = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod(method)
    if (body.nonEmpty) {
      conn.setDoOutput(true)
      conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded")
      conn.setRequestProperty("Content-Length", Integer.toString(body.length))
      val out = new DataOutputStream(conn.getOutputStream)
      out.write(body.getBytes(StandardCharsets.UTF_8))
      out.close()
    }
    conn
  }
} 
Example 83
Source File: LocalKMeans.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.SparkContext._


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D){rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
} 
Example 84
Source File: BlockStoreShuffleFetcher.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.shuffle.hash

import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
import scala.util.{Failure, Success, Try}

import org.apache.spark._
import org.apache.spark.serializer.Serializer
import org.apache.spark.shuffle.FetchFailedException
import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId}
import org.apache.spark.util.CompletionIterator

private[hash] object BlockStoreShuffleFetcher extends Logging {
  def fetch[T](
      shuffleId: Int,
      reduceId: Int,
      context: TaskContext,
      serializer: Serializer)
    : Iterator[T] =
  {
    logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId))
    val blockManager = SparkEnv.get.blockManager

    val startTime = System.currentTimeMillis
    val statuses = SparkEnv.get.mapOutputTracker.getServerStatuses(shuffleId, reduceId)
    logDebug("Fetching map output location for shuffle %d, reduce %d took %d ms".format(
      shuffleId, reduceId, System.currentTimeMillis - startTime))

    val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(Int, Long)]]
    for (((address, size), index) <- statuses.zipWithIndex) {
      splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size))
    }

    val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = splitsByAddress.toSeq.map {
      case (address, splits) =>
        (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2)))
    }

    def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = {
      val blockId = blockPair._1
      val blockOption = blockPair._2
      blockOption match {
        case Success(block) => {
          block.asInstanceOf[Iterator[T]]
        }
        case Failure(e) => {
          blockId match {
            case ShuffleBlockId(shufId, mapId, _) =>
              val address = statuses(mapId.toInt)._1
              throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
            case _ =>
              throw new SparkException(
                "Failed to get block " + blockId + ", which is not a shuffle block", e)
          }
        }
      }
    }

    val blockFetcherItr = new ShuffleBlockFetcherIterator(
      context,
      SparkEnv.get.blockManager.shuffleClient,
      blockManager,
      blocksByAddress,
      serializer,
      // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility
      SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024)
    val itr = blockFetcherItr.flatMap(unpackBlock)

    val completionIter = CompletionIterator[T, Iterator[T]](itr, {
      context.taskMetrics.updateShuffleReadMetrics()
    })

    new InterruptibleIterator[T](context, completionIter) {
      val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency()
      override def next(): T = {
        readMetrics.incRecordsRead(1)
        delegate.next()
      }
    }
  }
} 
Example 85
Source File: ExecutorsTab.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.exec

import scala.collection.mutable.HashMap

import org.apache.spark.ExceptionFailure
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler._
import org.apache.spark.storage.{StorageStatus, StorageStatusListener}
import org.apache.spark.ui.{SparkUI, SparkUITab}
import org.apache.spark.ui.jobs.UIData.ExecutorUIData

private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") {
  val listener = parent.executorsListener
  val sc = parent.sc
  val threadDumpEnabled =
    sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true)

  attachPage(new ExecutorsPage(this, threadDumpEnabled))
  if (threadDumpEnabled) {
    attachPage(new ExecutorThreadDumpPage(this))
  }
}


@DeveloperApi
class ExecutorsListener(storageStatusListener: StorageStatusListener) extends SparkListener {
  val executorToTasksActive = HashMap[String, Int]()
  val executorToTasksComplete = HashMap[String, Int]()
  val executorToTasksFailed = HashMap[String, Int]()
  val executorToDuration = HashMap[String, Long]()
  val executorToInputBytes = HashMap[String, Long]()
  val executorToInputRecords = HashMap[String, Long]()
  val executorToOutputBytes = HashMap[String, Long]()
  val executorToOutputRecords = HashMap[String, Long]()
  val executorToShuffleRead = HashMap[String, Long]()
  val executorToShuffleWrite = HashMap[String, Long]()
  val executorToLogUrls = HashMap[String, Map[String, String]]()
  val executorIdToData = HashMap[String, ExecutorUIData]()

  def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList

  override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = synchronized {
    val eid = executorAdded.executorId
    executorToLogUrls(eid) = executorAdded.executorInfo.logUrlMap
    executorIdToData(eid) = ExecutorUIData(executorAdded.time)
  }

  override def onExecutorRemoved(
      executorRemoved: SparkListenerExecutorRemoved): Unit = synchronized {
    val eid = executorRemoved.executorId
    val uiData = executorIdToData(eid)
    uiData.finishTime = Some(executorRemoved.time)
    uiData.finishReason = Some(executorRemoved.reason)
  }

  override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized {
    val eid = taskStart.taskInfo.executorId
    executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1
  }

  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized {
    val info = taskEnd.taskInfo
    if (info != null) {
      val eid = info.executorId
      executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1
      executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration
      taskEnd.reason match {
        case e: ExceptionFailure =>
          executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1
        case _ =>
          executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1
      }

      // Update shuffle read/write
      val metrics = taskEnd.taskMetrics
      if (metrics != null) {
        metrics.inputMetrics.foreach { inputMetrics =>
          executorToInputBytes(eid) =
            executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead
          executorToInputRecords(eid) =
            executorToInputRecords.getOrElse(eid, 0L) + inputMetrics.recordsRead
        }
        metrics.outputMetrics.foreach { outputMetrics =>
          executorToOutputBytes(eid) =
            executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten
          executorToOutputRecords(eid) =
            executorToOutputRecords.getOrElse(eid, 0L) + outputMetrics.recordsWritten
        }
        metrics.shuffleReadMetrics.foreach { shuffleRead =>
          executorToShuffleRead(eid) =
            executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead
        }
        metrics.shuffleWriteMetrics.foreach { shuffleWrite =>
          executorToShuffleWrite(eid) =
            executorToShuffleWrite.getOrElse(eid, 0L) + shuffleWrite.shuffleBytesWritten
        }
      }
    }
  }

} 
Example 86
Source File: UIData.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 87
Source File: PoolTable.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.progressListener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath), p.name)
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 88
Source File: StageInfo.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.storage.RDDInfo


  def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      stage.attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details)
  }
} 
Example 89
Source File: GroupedSumEvaluator.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val sum = entry.getValue.sum
        result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val meanEstimate = counter.mean
        val meanVar = counter.sampleVariance / counter.count
        val countEstimate = (counter.count + 1 - p) / p
        val countVar = (counter.count + 1) * (1 - p) / (p * p)
        val sumEstimate = meanEstimate * countEstimate
        val sumVar = (meanEstimate * meanEstimate * countVar) +
                     (countEstimate * countEstimate * meanVar) +
                     (meanVar * countVar)
        val sumStdev = math.sqrt(sumVar)
        val confFactor = studentTCacher.get(counter.count)
        val low = sumEstimate - confFactor * sumStdev
        val high = sumEstimate + confFactor * sumStdev
        result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high)
      }
      result
    }
  }
} 
Example 90
Source File: GroupedCountEvaluator.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.commons.math3.distribution.NormalDistribution

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        result(key) = new BoundedDouble(sum, 1.0, sum, sum)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val confFactor = new NormalDistribution().
        inverseCumulativeProbability(1 - (1 - confidence) / 2)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        val mean = (sum + 1 - p) / p
        val variance = (sum + 1) * (1 - p) / (p * p)
        val stdev = math.sqrt(variance)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result(key) = new BoundedDouble(mean, confidence, low, high)
      }
      result
    }
  }
} 
Example 91
Source File: GroupedMeanEvaluator.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val mean = entry.getValue.mean
        result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val mean = counter.mean
        val stdev = math.sqrt(counter.sampleVariance / counter.count)
        val confFactor = studentTCacher.get(counter.count)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result(entry.getKey) = new BoundedDouble(mean, confidence, low, high)
      }
      result
    }
  }
} 
Example 92
Source File: FeatureSelection.scala    From aerosolve   with Apache License 2.0 5 votes vote down vote up
package com.airbnb.aerosolve.training

import java.io.BufferedWriter
import java.io.OutputStreamWriter
import java.util

import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example}
import com.airbnb.aerosolve.core.models.LinearModel
import com.airbnb.aerosolve.core.util.Util
import com.typesafe.config.Config
import org.slf4j.{LoggerFactory, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.Buffer
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import scala.util.Random
import scala.math.abs
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

object FeatureSelection {
  private final val log: Logger = LoggerFactory.getLogger("FeatureSelection")
  val allKey : (String, String) = ("$ALL", "$POS")

  // Given a RDD compute the pointwise mutual information between
  // the positive label and the discrete features.
  def pointwiseMutualInformation(examples : RDD[Example],
                                 config : Config,
                                 key : String,
                                 rankKey : String,
                                 posThreshold : Double,
                                 minPosCount : Double,
                                 newCrosses : Boolean) : RDD[((String, String), Double)] = {
    val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey)
    val features = pointwise
      .mapPartitions(part => {
      // The tuple2 is var, var | positive
      val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]()
      part.foreach(example =>{
        val featureVector = example.example.get(0)
        val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0
        else 0.0
        val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0))
        output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos))

        val features : Array[(String, String)] =
          LinearRankerUtils.getFeatures(featureVector)
        if (newCrosses) {
          for (i <- features) {
            for (j <- features) {
              if (i._1 < j._1) {
                val key = ("%s<NEW>%s".format(i._1, j._1),
                           "%s<NEW>%s".format(i._2, j._2))
                val x = output.getOrElse(key, (0.0, 0.0))
                output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos))
              }
            }
          }
        }
        for (feature <- features) {
          val x = output.getOrElse(feature, (0.0, 0.0))
          output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos))
        }
      })
      output.iterator
    })
    .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2))
    .filter(x => x._2._2 >= minPosCount)

    val allCount = features.filter(x => x._1.equals(allKey)).take(1).head

    features.map(x => {
      val prob = x._2._1 / allCount._2._1
      val probPos = x._2._2 / allCount._2._2
      (x._1, math.log(probPos / prob) / math.log(2.0))
    })
  }

  // Returns the maximum entropy per family
  def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = {
    input
      .map(x => (x._1._1, (x._1._2, x._2)))
      .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b)
      .map(x => ((x._1, x._2._1), x._2._2))
  }
} 
Example 93
Source File: package.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi

import scala.collection.mutable.HashMap

import org.apache.hadoop.fs.permission.FsPermission

package object yarn {

  type EnvMap = HashMap[String, String]

  val KYUUBI_YARN_APP_NAME = "KYUUBI SERVER"
  val KYUUBI_YARN_APP_TYPE = "KYUUBI"
  // Staging directory for any temporary jars or files
  val KYUUBI_STAGING: String = ".kyuubiStaging"
  // Staging directory is private! -> rwx--------
  val STAGING_DIR_PERMISSION: FsPermission =
    FsPermission.createImmutable(Integer.parseInt("700", 8).toShort)
  // App files are world-wide readable and owner writable -> rw-r--r--
  val APP_FILE_PERMISSION: FsPermission =
    FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)

  val SPARK_CONF_DIR = "__spark_conf__"
  val SPARK_CONF_FILE = "__spark_conf__.properties"
  // Subdirectory in the conf directory containing Hadoop config files.
  val HADOOP_CONF_DIR = "__hadoop_conf__"
  // File containing the conf archive in the AM. See prepareLocalResources().
  val SPARK_CONF_ARCHIVE: String = SPARK_CONF_DIR + ".zip"
  val SPARK_LIB_DIR = "__spark_libs__"
  val LOCAL_SCHEME = "local"
} 
Example 94
Source File: KyuubiDistributedCacheManager.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.net.URI

import scala.collection.mutable.{HashMap, Map}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
import org.apache.hadoop.yarn.api.records.{LocalResource, LocalResourceType}


  def addResource(
      fs: FileSystem,
      conf: Configuration,
      destPath: Path,
      localResources: HashMap[String, LocalResource],
      resourceType: LocalResourceType,
      link: String,
      statCache: Map[URI, FileStatus]): Unit = {
    cacheManager.addResource(fs, conf, destPath,
      localResources, resourceType, link, statCache, appMasterOnly = true)
  }
} 
Example 95
Source File: KyuubiDistributedCacheManagerSuite.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.net.URI

import scala.collection.mutable.{HashMap, Map}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
import org.apache.hadoop.yarn.api.records.{LocalResource, LocalResourceType, LocalResourceVisibility}
import org.apache.hadoop.yarn.util.ConverterUtils
import org.apache.spark.{KyuubiSparkUtil, SparkFunSuite}
import org.mockito.Mockito.when
import org.scalatest.mock.MockitoSugar

import yaooqinn.kyuubi.utils.ReflectUtils

class KyuubiDistributedCacheManagerSuite extends SparkFunSuite with MockitoSugar {

  class MockClientDistributedCacheManager extends ClientDistributedCacheManager {
    override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]):
    LocalResourceVisibility = {
      LocalResourceVisibility.PRIVATE
    }
  }

  test("add resource") {
    val fs = mock[FileSystem]
    val conf = new Configuration()
    val destPath = new Path("file:///foo.bar.com:8080/tmp/testing")
    val localResources = HashMap[String, LocalResource]()
    val statCache = HashMap[URI, FileStatus]()
    val status = new FileStatus()
    when(fs.getFileStatus(destPath)).thenReturn(status)
    val fileLink = "link"
    ReflectUtils.setFieldValue(
      KyuubiDistributedCacheManager, "cacheManager", new MockClientDistributedCacheManager)
    KyuubiDistributedCacheManager.addResource(
      fs, conf, destPath, localResources, LocalResourceType.FILE, fileLink, statCache)
    val res = localResources(fileLink)
    assert(res.getVisibility === LocalResourceVisibility.PRIVATE)
    assert(ConverterUtils.getPathFromYarnURL(res.getResource) === destPath)
    assert(res.getSize === 0)
    assert(res.getTimestamp === 0)
    assert(res.getType === LocalResourceType.FILE)
    val status2 = new FileStatus(
      10, false, 1, 1024, 10,
      10, null, KyuubiSparkUtil.getCurrentUserName, null, new Path("/tmp/testing2"))
    val destPath2 = new Path("file:///foo.bar.com:8080/tmp/testing2")
    when(fs.getFileStatus(destPath2)).thenReturn(status2)
    val fileLink2 = "link2"
    KyuubiDistributedCacheManager.addResource(
      fs, conf, destPath2, localResources, LocalResourceType.FILE, fileLink2, statCache)
    val res2 = localResources(fileLink2)
    assert(res2.getVisibility === LocalResourceVisibility.PRIVATE)
    assert(ConverterUtils.getPathFromYarnURL(res2.getResource) === destPath2)
    assert(res2.getSize === 10)
    assert(res2.getTimestamp === 10)
    assert(res2.getType === LocalResourceType.FILE)
  }

  test("add resource when link null") {
    val distMgr = new MockClientDistributedCacheManager()
    val fs = mock[FileSystem]
    val conf = new Configuration()
    val destPath = new Path("file:///foo.bar.com:8080/tmp/testing")
    ReflectUtils.setFieldValue(KyuubiDistributedCacheManager, "cacheManager", distMgr)
    val localResources = HashMap[String, LocalResource]()
    val statCache = HashMap[URI, FileStatus]()
    when(fs.getFileStatus(destPath)).thenReturn(new FileStatus())
    intercept[Exception] {
      KyuubiDistributedCacheManager.addResource(
        fs, conf, destPath, localResources, LocalResourceType.FILE, null, statCache)
    }
    assert(localResources.get("link") === None)
    assert(localResources.size === 0)
  }

  test("test addResource archive") {
    val distMgr = new MockClientDistributedCacheManager()
    ReflectUtils.setFieldValue(KyuubiDistributedCacheManager, "cacheManager", distMgr)
    val fs = mock[FileSystem]
    val conf = new Configuration()
    val destPath = new Path("file:///foo.bar.com:8080/tmp/testing")
    val localResources = HashMap[String, LocalResource]()
    val statCache = HashMap[URI, FileStatus]()
    val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner",
      null, new Path("/tmp/testing"))
    when(fs.getFileStatus(destPath)).thenReturn(realFileStatus)

    KyuubiDistributedCacheManager.addResource(
      fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", statCache)
    val resource = localResources("link")
    assert(resource.getVisibility === LocalResourceVisibility.PRIVATE)
    assert(ConverterUtils.getPathFromYarnURL(resource.getResource) === destPath)
    assert(resource.getTimestamp === 10)
    assert(resource.getSize === 10)
    assert(resource.getType === LocalResourceType.ARCHIVE)

  }

} 
Example 96
Source File: UIData.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable
import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0//任务时间
    var failedTasks : Int = 0//失败任务数
    var succeededTasks : Int = 0//完成任务数
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,//提交时间
    var completionTime: Option[Long] = None,//完成时间
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 97
Source File: PoolTable.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.progressListener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath), p.name)
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 98
Source File: StageInfo.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.storage.RDDInfo


  def fromStage(
      stage: Stage,
      attemptId: Int,
      //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类
      numTasks: Option[Int] = None,
      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
    ): StageInfo = {
    //方法获取RDD的所有直接或间接的NarrowDependency的RDD
    //RDDInfo.fromRdd创建RDDInfo信息,包括RDD父依赖关系
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    //对当前stage的RDD也生成RDDInfo,然后所有生成的RDDInfo合并到rddInfos
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details,
      taskLocalityPreferences)
  }
} 
Example 99
Source File: GroupedSumEvaluator.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val sum = entry.getValue.sum
        result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val meanEstimate = counter.mean
        val meanVar = counter.sampleVariance / counter.count
        val countEstimate = (counter.count + 1 - p) / p
        val countVar = (counter.count + 1) * (1 - p) / (p * p)
        val sumEstimate = meanEstimate * countEstimate
        val sumVar = (meanEstimate * meanEstimate * countVar) +
                     (countEstimate * countEstimate * meanVar) +
                     (meanVar * countVar)
        val sumStdev = math.sqrt(sumVar)
        val confFactor = studentTCacher.get(counter.count)
        val low = sumEstimate - confFactor * sumStdev
        val high = sumEstimate + confFactor * sumStdev
        result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high)
      }
      result
    }
  }
} 
Example 100
Source File: GroupedCountEvaluator.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.commons.math3.distribution.NormalDistribution

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        result(key) = new BoundedDouble(sum, 1.0, sum, sum)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val confFactor = new NormalDistribution().
        inverseCumulativeProbability(1 - (1 - confidence) / 2)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        val mean = (sum + 1 - p) / p
        val variance = (sum + 1) * (1 - p) / (p * p)
        val stdev = math.sqrt(variance)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result(key) = new BoundedDouble(mean, confidence, low, high)
      }
      result
    }
  }
} 
Example 101
Source File: GroupedMeanEvaluator.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConversions.mapAsScalaMap
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val mean = entry.getValue.mean
        result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean)
      }
      result
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val mean = counter.mean
        val stdev = math.sqrt(counter.sampleVariance / counter.count)
        val confFactor = studentTCacher.get(counter.count)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result(entry.getKey) = new BoundedDouble(mean, confidence, low, high)
      }
      result
    }
  }
} 
Example 102
Source File: Mapper.scala    From CSYE7200   with MIT License 5 votes vote down vote up
package edu.neu.coe.csye7200.mapreduce

import akka.actor.{Actor, ActorLogging, ActorRef}

import scala.collection.mutable
import scala.collection.mutable.HashMap
import scala.util._


class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) {
  
  override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]): (Map[K2, Seq[V2]], Seq[Throwable]) = {
      val v2sK2m = mutable.HashMap[K2,Seq[V2]]() // mutable
      val xs = Seq[Throwable]() // mutable
    // CONSIDER using traverse
    for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t))
        v2k2e match {
          case Right((k2,v2)) => v2sK2m put(k2, v2+:v2sK2m.getOrElse(k2, Nil))
          case Left(x) => xs :+ x
      }
      (v2sK2m.toMap, xs)
  }
}

case class Incoming[K, V](m: Seq[(K,V)]) {
  override def toString = s"Incoming: with ${m.size} elements"
}

object Incoming {
  def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap})
  def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq)
}

object Mapper {
} 
Example 103
Source File: LocalKMeans.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{squaredDistance, DenseVector, Vector}


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D) {rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers(i)
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use org.apache.spark.ml.clustering.KMeans
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    val points = new HashSet[Vector[Double]]
    val kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println(s"Initial centers: $kPoints")

    while(tempDist > convergeDist) {
      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      val mappings = closest.groupBy[Int] (x => x._1)

      val pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints(mapping._1), mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println(s"Final centers: $kPoints")
  }
}
// scalastyle:on println 
Example 104
Source File: LocalityPlacementStrategySuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import scala.collection.JavaConverters._
import scala.collection.mutable.{HashMap, HashSet, Set}

import org.apache.hadoop.yarn.api.records._
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.mockito.Mockito._

import org.apache.spark.{SparkConf, SparkFunSuite}

class LocalityPlacementStrategySuite extends SparkFunSuite {

  test("handle large number of containers and tasks (SPARK-18750)") {
    // Run the test in a thread with a small stack size, since the original issue
    // surfaced as a StackOverflowError.
    var error: Throwable = null

    val runnable = new Runnable() {
      override def run(): Unit = try {
        runTest()
      } catch {
        case e: Throwable => error = e
      }
    }

    val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024)
    thread.start()
    thread.join()

    assert(error === null)
  }

  private def runTest(): Unit = {
    val yarnConf = new YarnConfiguration()

    // The numbers below have been chosen to balance being large enough to replicate the
    // original issue while not taking too long to run when the issue is fixed. The main
    // goal is to create enough requests for localized containers (so there should be many
    // tasks on several hosts that have no allocated containers).

    val resource = Resource.newInstance(8 * 1024, 4)
    val strategy = new LocalityPreferredContainerPlacementStrategy(new SparkConf(),
      yarnConf, resource, new MockResolver())

    val totalTasks = 32 * 1024
    val totalContainers = totalTasks / 16
    val totalHosts = totalContainers / 16

    val mockId = mock(classOf[ContainerId])
    val hosts = (1 to totalHosts).map { i => (s"host_$i", totalTasks % i) }.toMap
    val containers = (1 to totalContainers).map { i => mockId }
    val count = containers.size / hosts.size / 2

    val hostToContainerMap = new HashMap[String, Set[ContainerId]]()
    hosts.keys.take(hosts.size / 2).zipWithIndex.foreach { case (host, i) =>
      val hostContainers = new HashSet[ContainerId]()
      containers.drop(count * i).take(i).foreach { c => hostContainers += c }
      hostToContainerMap(host) = hostContainers
    }

    strategy.localityOfRequestedContainers(containers.size * 2, totalTasks, hosts,
      hostToContainerMap, Nil)
  }

} 
Example 105
Source File: JsonUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.kafka.common.TopicPartition
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization


  def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
    val result = new HashMap[String, HashMap[Int, Long]]()
    implicit val ordering = new Ordering[TopicPartition] {
      override def compare(x: TopicPartition, y: TopicPartition): Int = {
        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
      }
    }
    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
    partitions.foreach { tp =>
        val off = partitionOffsets(tp)
        val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
        parts += tp.partition -> off
        result += tp.topic -> parts
    }
    Serialization.write(result)
  }
} 
Example 106
Source File: StageInfo.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.storage.RDDInfo


  def fromStage(
      stage: Stage,
      attemptId: Int,
      numTasks: Option[Int] = None,
      taskMetrics: TaskMetrics = null,
      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
    ): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details,
      taskMetrics,
      taskLocalityPreferences)
  }
} 
Example 107
Source File: GroupedCountEvaluator.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  private var outputsMerged = 0
  private val sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap
    }
  }
} 
Example 108
Source File: MasterWebUISuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.master.ui

import java.io.DataOutputStream
import java.net.{HttpURLConnection, URL}
import java.nio.charset.StandardCharsets
import java.util.Date

import scala.collection.mutable.HashMap

import org.mockito.Mockito.{mock, times, verify, when}
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver}
import org.apache.spark.deploy.DeployTestUtils._
import org.apache.spark.deploy.master._
import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}


class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {

  val conf = new SparkConf
  val securityMgr = new SecurityManager(conf)
  val rpcEnv = mock(classOf[RpcEnv])
  val master = mock(classOf[Master])
  val masterEndpointRef = mock(classOf[RpcEndpointRef])
  when(master.securityMgr).thenReturn(securityMgr)
  when(master.conf).thenReturn(conf)
  when(master.rpcEnv).thenReturn(rpcEnv)
  when(master.self).thenReturn(masterEndpointRef)
  val masterWebUI = new MasterWebUI(master, 0)

  override def beforeAll() {
    super.beforeAll()
    masterWebUI.bind()
  }

  override def afterAll() {
    masterWebUI.stop()
    super.afterAll()
  }

  test("kill application") {
    val appDesc = createAppDesc()
    // use new start date so it isn't filtered by UI
    val activeApp = new ApplicationInfo(
      new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue)

    when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp)))

    val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/"
    val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify the master was called to remove the active app
    verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED)
  }

  test("kill driver") {
    val activeDriverId = "driver-0"
    val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/"
    val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true")))
    val conn = sendHttpRequest(url, "POST", body)
    conn.getResponseCode

    // Verify that master was asked to kill driver with the correct id
    verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId))
  }

  private def convPostDataToString(data: Map[String, String]): String = {
    (for ((name, value) <- data) yield s"$name=$value").mkString("&")
  }

  
  private def sendHttpRequest(
      url: String,
      method: String,
      body: String = ""): HttpURLConnection = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod(method)
    if (body.nonEmpty) {
      conn.setDoOutput(true)
      conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded")
      conn.setRequestProperty("Content-Length", Integer.toString(body.length))
      val out = new DataOutputStream(conn.getOutputStream)
      out.write(body.getBytes(StandardCharsets.UTF_8))
      out.close()
    }
    conn
  }
} 
Example 109
Source File: TaskDescriptionSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException}
import java.nio.ByteBuffer
import java.util.Properties

import scala.collection.mutable.HashMap

import org.apache.spark.SparkFunSuite

class TaskDescriptionSuite extends SparkFunSuite {
  test("encoding and then decoding a TaskDescription results in the same TaskDescription") {
    val originalFiles = new HashMap[String, Long]()
    originalFiles.put("fileUrl1", 1824)
    originalFiles.put("fileUrl2", 2)

    val originalJars = new HashMap[String, Long]()
    originalJars.put("jar1", 3)

    val originalProperties = new Properties()
    originalProperties.put("property1", "18")
    originalProperties.put("property2", "test value")
    // SPARK-19796 -- large property values (like a large job description for a long sql query)
    // can cause problems for DataOutputStream, make sure we handle correctly
    val sb = new StringBuilder()
    (0 to 10000).foreach(_ => sb.append("1234567890"))
    val largeString = sb.toString()
    originalProperties.put("property3", largeString)
    // make sure we've got a good test case
    intercept[UTFDataFormatException] {
      val out = new DataOutputStream(new ByteArrayOutputStream())
      try {
        out.writeUTF(largeString)
      } finally {
        out.close()
      }
    }

    // Create a dummy byte buffer for the task.
    val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))

    val originalTaskDescription = new TaskDescription(
      taskId = 1520589,
      attemptNumber = 2,
      executorId = "testExecutor",
      name = "task for test",
      index = 19,
      originalFiles,
      originalJars,
      originalProperties,
      taskBuffer
    )

    val serializedTaskDescription = TaskDescription.encode(originalTaskDescription)
    val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription)

    // Make sure that all of the fields in the decoded task description match the original.
    assert(decodedTaskDescription.taskId === originalTaskDescription.taskId)
    assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber)
    assert(decodedTaskDescription.executorId === originalTaskDescription.executorId)
    assert(decodedTaskDescription.name === originalTaskDescription.name)
    assert(decodedTaskDescription.index === originalTaskDescription.index)
    assert(decodedTaskDescription.addedFiles.equals(originalFiles))
    assert(decodedTaskDescription.addedJars.equals(originalJars))
    assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties))
    assert(decodedTaskDescription.serializedTask.equals(taskBuffer))
  }
} 
Example 110
Source File: KinesisSourceOffset.scala    From kinesis-sql   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kinesis

import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.spark.sql.execution.streaming.Offset
import org.apache.spark.sql.execution.streaming.SerializedOffset
import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset}


 
  def apply(json: String): KinesisSourceOffset = {
    try {
      val readObj = Serialization.read[ Map[ String, Map[ String, String ] ] ](json)
      val metadata = readObj.get("metadata")
      val shardInfoMap: Map[String, ShardInfo ] = readObj.filter(_._1 != "metadata").map {
        case (shardId, value) => shardId.toString -> new ShardInfo(shardId.toString,
          value.get("iteratorType").get,
          value.get("iteratorPosition").get)
      }.toMap
      KinesisSourceOffset(
        new ShardOffsets(
          metadata.get("batchId").toLong,
          metadata.get("streamName"),
          shardInfoMap))
    } catch {
      case NonFatal(x) => throw new IllegalArgumentException(x)
    }
  }

  def getMap(shardInfos: Array[ShardInfo]): Map[String, ShardInfo] = {
    shardInfos.map {
      s: ShardInfo => (s.shardId -> s)
    }.toMap
  }

} 
Example 111
Source File: LocalKMeans.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.SparkContext._


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData: Array[DenseVector[Double]] = {
    def generatePoint(i: Int): DenseVector[Double] = {
      DenseVector.fill(D){rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
}
// scalastyle:on println 
Example 112
Source File: RecursivePlanDetails.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package edu.ucla.cs.wis.bigdatalog.spark

import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

import scala.collection.mutable.HashMap

class RecursivePlanDetails extends Serializable {
  private val baseRelationsByName = new HashMap[String, LogicalPlan]
  val recursiveRelations = new HashMap[String, LogicalPlan]
  val aggregateRelations = new HashMap[String, LogicalPlan]

  def addBaseRelation(name: String, obj: LogicalPlan) = {
    baseRelationsByName.put(name, obj)
  }

  def containsBaseRelation(name: String): Boolean = {
    baseRelationsByName.contains(name)
  }
} 
Example 113
Source File: CachedRDDManager.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package edu.ucla.cs.wis.bigdatalog.spark.execution.recursion

import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable.{HashMap, HashSet, Set}

class CachedRDDManager(defaultStorageLevel: StorageLevel)
  extends Logging with Serializable {

  val iterationToRDDMap = new HashMap[Int, HashSet[RDD[_]]]
  var currentIteration : Int = 0

  def persist(rdd: RDD[_]): Unit = {
    persist(rdd, false)
  }

  def persist(rdd: RDD[_], doMemoryCheckpoint: Boolean): Unit = {
    iterationToRDDMap.getOrElseUpdate(currentIteration, new HashSet[RDD[_]]).add(rdd)
    rdd.persist(defaultStorageLevel)

    if (doMemoryCheckpoint)
      rdd.memoryCheckpoint()
  }

  def cleanUpIteration(iterationsBackToRemove: Int = 2) = {
    val start = System.currentTimeMillis()
    if (currentIteration >= iterationsBackToRemove) {
      val iterationId = currentIteration - iterationsBackToRemove
      if (iterationToRDDMap.contains(iterationId)) {
        val rdds: HashSet[RDD[_]] = iterationToRDDMap.remove(iterationId).get
        if (rdds.nonEmpty)
          logInfo("Unpersisting "+rdds.size+" rdds for iteration " + iterationId)
        rdds.foreach(rdd => rdd.unpersist(false))
      }
    }
    logInfo("CleanUpIteration took " + (System.currentTimeMillis() - start) + " ms")
    currentIteration += 1
  }

  def cleanUpIterationById(iterationId: Int) = {
    if (iterationToRDDMap.contains(iterationId)) {
      val rdds: HashSet[RDD[_]] = iterationToRDDMap.remove(iterationId).get
      rdds.foreach(rdd => rdd.unpersist(false))
    }
  }

  def incrementIteration() { currentIteration += 1}

  def clear() = {
    iterationToRDDMap.clear()
  }

  def clear(remainCached: Seq[RDD[_]]) = {
    iterationToRDDMap.keySet.foreach(key => logInfo("key: " + key + " value: " + iterationToRDDMap.get(key)))

    iterationToRDDMap.keySet
      .foreach(key => iterationToRDDMap.get(key)
      .foreach(value => value.foreach(item => {if (!remainCached.contains(item)) item.unpersist(false)})))

    iterationToRDDMap.clear()
  }

  def unpersist(rdds: Set[RDD[_]]) = {
    for (rdd <- rdds) {
      iterationToRDDMap.synchronized {
        // rdd should only be in 1 iteration
        val iterations = iterationToRDDMap.filter(x => x._2.contains(rdd))
        if (iterations.nonEmpty) {
          val iteration = iterations.head
          iteration._2.remove(rdd)
          rdd.unpersist(false)
          if (iteration._2.isEmpty)
            iterationToRDDMap.remove(iteration._1)
        }
      }
    }
  }

  override def toString = {
    val output = new StringBuilder
    iterationToRDDMap.keySet.toSeq.sorted
      .foreach(iteration => {
        val rdds = iterationToRDDMap.get(iteration)
        rdds.foreach(rdd => output.append(iteration + ":" + rdd + "\n"))
      })
    output.toString()
  }
} 
Example 114
Source File: RelationCatalog.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package edu.ucla.cs.wis.bigdatalog.spark

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.StructType

import scala.collection.mutable.HashMap

class RelationCatalog extends Serializable {
  val directory = HashMap.empty[String, RelationInfo]

  def addRelation(name : String, schema : StructType) : Unit = {
    val relationInfo = new RelationInfo().setSchema(schema)
    directory.get(name) match {
      case Some(oldRelationInfo) =>
        // update rdd if already present.  Schema should not change
        oldRelationInfo.setRDD(relationInfo.getRDD())
      case None => directory.put(name, relationInfo)
    }
  }

  def setRDD(name : String, rdd : RDD[InternalRow]) : Unit = {
    directory.get(name) match {
      case Some(oldRelationInfo) => oldRelationInfo.setRDD(rdd)
      case None => directory.put(name, new RelationInfo().setRDD(rdd))
    }
  }

  def getRelationInfo(name : String) : RelationInfo = {
    if (directory.contains(name))
      directory(name)
    else
      null
  }

  def removeRDD(name : String) : Unit = {
    directory.remove(name)
  }

  def clear() : Unit = {
    directory.clear()
  }

  override def toString(): String = {
    val output = new StringBuilder()
    directory.iterator.foreach(f => output.append(f.toString()))
    output.toString()
  }
}

class RelationInfo() extends Serializable {
  private var schema : StructType = _
  private var rdd : RDD[InternalRow] = _

  def getSchema() : StructType = schema

  def setSchema(schema : StructType) : RelationInfo = {
    this.schema = schema
    this
  }

  def getRDD() : RDD[InternalRow] = rdd

  def setRDD(rdd : RDD[InternalRow]) : RelationInfo = {
    this.rdd = rdd
    this
  }

  override def toString() : String = {
    "schema: " + this.schema + (if (rdd != null) " RDD")
  }
} 
Example 115
Source File: UIData.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable
import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 116
Source File: PoolTable.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import java.net.URLEncoder

import scala.collection.mutable.HashMap
import scala.xml.Node

import org.apache.spark.scheduler.{Schedulable, StageInfo}
import org.apache.spark.ui.UIUtils


private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
  private val listener = parent.progressListener

  def toNodeSeq: Seq[Node] = {
    listener.synchronized {
      poolTable(poolRow, pools)
    }
  }

  private def poolTable(
      makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node],
      rows: Seq[Schedulable]): Seq[Node] = {
    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
      <thead>
        <th>Pool Name</th>
        <th>Minimum Share</th>
        <th>Pool Weight</th>
        <th>Active Stages</th>
        <th>Running Tasks</th>
        <th>SchedulingMode</th>
      </thead>
      <tbody>
        {rows.map(r => makeRow(r, listener.poolToActiveStages))}
      </tbody>
    </table>
  }

  private def poolRow(
      p: Schedulable,
      poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = {
    val activeStages = poolToActiveStages.get(p.name) match {
      case Some(stages) => stages.size
      case None => 0
    }
    val href = "%s/stages/pool?poolname=%s"
      .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8"))
    <tr>
      <td>
        <a href={href}>{p.name}</a>
      </td>
      <td>{p.minShare}</td>
      <td>{p.weight}</td>
      <td>{activeStages}</td>
      <td>{p.runningTasks}</td>
      <td>{p.schedulingMode}</td>
    </tr>
  }
} 
Example 117
Source File: StageInfo.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import scala.collection.mutable.HashMap

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.storage.RDDInfo


  def fromStage(
      stage: Stage,
      attemptId: Int,
      numTasks: Option[Int] = None,
      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
    ): StageInfo = {
    val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
    val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
    new StageInfo(
      stage.id,
      attemptId,
      stage.name,
      numTasks.getOrElse(stage.numTasks),
      rddInfos,
      stage.parents.map(_.id),
      stage.details,
      taskLocalityPreferences)
  }
} 
Example 118
Source File: FixedPointJobDefinition.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler.fixedpoint

import org.apache.spark.TaskContext
import org.apache.spark.rdd.RDD

import scala.collection.mutable.{HashSet, HashMap, Set}

class FixedPointJobDefinition(val setupIteration: (FixedPointJobDefinition, RDD[_]) => RDD[_],
                              val cleanupIteration: (Int) => Unit) {
  var _fixedPointEvaluator: (TaskContext, Iterator[_]) => Boolean = null
  var finalRDD: RDD[_] = null
  var rddIds = Array.empty[Int] // for all and delta rdd id for FixedPointResultTask execution on worker

  def fixedPointEvaluator(fixedPointEvaluator: (TaskContext, Iterator[_]) => Boolean) = {
    _fixedPointEvaluator = fixedPointEvaluator
  }

  def getfixedPointEvaluator = _fixedPointEvaluator.asInstanceOf[(TaskContext, Iterator[_]) => _]

  def getFinalRDD: RDD[_] = finalRDD

  def setRDDIds(newAllRDDId: Int,
                oldAllRDDId: Int,
                newDeltaPrimeRDDId: Int,
                oldDeltaPrimeRDDId: Int): Unit = {

    rddIds = Array(newAllRDDId, oldAllRDDId, newDeltaPrimeRDDId, oldDeltaPrimeRDDId)
  }
} 
Example 119
Source File: GroupedSumEvaluator.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConverters._
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val sum = entry.getValue.sum
        result.put(entry.getKey, new BoundedDouble(sum, 1.0, sum, sum))
      }
      result.asScala
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val meanEstimate = counter.mean
        val meanVar = counter.sampleVariance / counter.count
        val countEstimate = (counter.count + 1 - p) / p
        val countVar = (counter.count + 1) * (1 - p) / (p * p)
        val sumEstimate = meanEstimate * countEstimate
        val sumVar = (meanEstimate * meanEstimate * countVar) +
                     (countEstimate * countEstimate * meanVar) +
                     (meanVar * countVar)
        val sumStdev = math.sqrt(sumVar)
        val confFactor = studentTCacher.get(counter.count)
        val low = sumEstimate - confFactor * sumStdev
        val high = sumEstimate + confFactor * sumStdev
        result.put(entry.getKey, new BoundedDouble(sumEstimate, confidence, low, high))
      }
      result.asScala
    }
  }
} 
Example 120
Source File: GroupedCountEvaluator.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConverters._
import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.commons.math3.distribution.NormalDistribution

import org.apache.spark.util.collection.OpenHashMap


private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new OpenHashMap[T, Long]()   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) {
    outputsMerged += 1
    taskResult.foreach { case (key, value) =>
      sums.changeValue(key, value, _ + value)
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        result.put(key, new BoundedDouble(sum, 1.0, sum, sum))
      }
      result.asScala
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val p = outputsMerged.toDouble / totalOutputs
      val confFactor = new NormalDistribution().
        inverseCumulativeProbability(1 - (1 - confidence) / 2)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      sums.foreach { case (key, sum) =>
        val mean = (sum + 1 - p) / p
        val variance = (sum + 1) * (1 - p) / (p * p)
        val stdev = math.sqrt(variance)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result.put(key, new BoundedDouble(mean, confidence, low, high))
      }
      result.asScala
    }
  }
} 
Example 121
Source File: GroupedMeanEvaluator.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.partial

import java.util.{HashMap => JHashMap}

import scala.collection.JavaConverters._
import scala.collection.Map
import scala.collection.mutable.HashMap

import org.apache.spark.util.StatCounter


private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double)
  extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] {

  var outputsMerged = 0
  var sums = new JHashMap[T, StatCounter]   // Sum of counts for each key

  override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) {
    outputsMerged += 1
    val iter = taskResult.entrySet.iterator()
    while (iter.hasNext) {
      val entry = iter.next()
      val old = sums.get(entry.getKey)
      if (old != null) {
        old.merge(entry.getValue)
      } else {
        sums.put(entry.getKey, entry.getValue)
      }
    }
  }

  override def currentResult(): Map[T, BoundedDouble] = {
    if (outputsMerged == totalOutputs) {
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val mean = entry.getValue.mean
        result.put(entry.getKey, new BoundedDouble(mean, 1.0, mean, mean))
      }
      result.asScala
    } else if (outputsMerged == 0) {
      new HashMap[T, BoundedDouble]
    } else {
      val studentTCacher = new StudentTCacher(confidence)
      val result = new JHashMap[T, BoundedDouble](sums.size)
      val iter = sums.entrySet.iterator()
      while (iter.hasNext) {
        val entry = iter.next()
        val counter = entry.getValue
        val mean = counter.mean
        val stdev = math.sqrt(counter.sampleVariance / counter.count)
        val confFactor = studentTCacher.get(counter.count)
        val low = mean - confFactor * stdev
        val high = mean + confFactor * stdev
        result.put(entry.getKey, new BoundedDouble(mean, confidence, low, high))
      }
      result.asScala
    }
  }
} 
Example 122
Source File: TaskContextImpl.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import scala.collection.mutable.{ArrayBuffer, HashMap}

import org.apache.spark.executor.TaskMetrics
import org.apache.spark.memory.TaskMemoryManager
import org.apache.spark.metrics.MetricsSystem
import org.apache.spark.metrics.source.Source
import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException}

private[spark] class TaskContextImpl(
    val stageId: Int,
    val partitionId: Int,
    override val taskAttemptId: Long,
    override val attemptNumber: Int,
    override val taskMemoryManager: TaskMemoryManager,
    @transient private val metricsSystem: MetricsSystem,
    internalAccumulators: Seq[Accumulator[Long]],
    val runningLocally: Boolean = false,
    val taskMetrics: TaskMetrics = TaskMetrics.empty)
  extends TaskContext
  with Logging {

  // For backwards-compatibility; this method is now deprecated as of 1.3.0.
  override def attemptId(): Long = taskAttemptId

  // List of callback functions to execute when the task completes.
  @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener]

  // Whether the corresponding task has been killed.
  @volatile private var interrupted: Boolean = false

  // Whether the task has completed.
  @volatile private var completed: Boolean = false

  override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
    onCompleteCallbacks += listener
    this
  }

  override def addTaskCompletionListener(f: TaskContext => Unit): this.type = {
    onCompleteCallbacks += new TaskCompletionListener {
      override def onTaskCompletion(context: TaskContext): Unit = f(context)
    }
    this
  }

  @deprecated("use addTaskCompletionListener", "1.1.0")
  override def addOnCompleteCallback(f: () => Unit) {
    onCompleteCallbacks += new TaskCompletionListener {
      override def onTaskCompletion(context: TaskContext): Unit = f()
    }
  }

  
  private[spark] def markInterrupted(): Unit = {
    interrupted = true
  }

  override def isCompleted(): Boolean = completed

  override def isRunningLocally(): Boolean = runningLocally

  override def isInterrupted(): Boolean = interrupted

  override def getMetricsSources(sourceName: String): Seq[Source] =
    metricsSystem.getSourcesByName(sourceName)

  @transient private val accumulators = new HashMap[Long, Accumulable[_, _]]

  private[spark] override def registerAccumulator(a: Accumulable[_, _]): Unit = synchronized {
    accumulators(a.id) = a
  }

  private[spark] override def collectInternalAccumulators(): Map[Long, Any] = synchronized {
    accumulators.filter(_._2.isInternal).mapValues(_.localValue).toMap
  }

  private[spark] override def collectAccumulators(): Map[Long, Any] = synchronized {
    accumulators.mapValues(_.localValue).toMap
  }

  //private[spark]
  override val internalMetricsToAccumulators: Map[String, Accumulator[Long]] = {
    // Explicitly register internal accumulators here because these are
    // not captured in the task closure and are already deserialized
    internalAccumulators.foreach(registerAccumulator)
    internalAccumulators.map { a => (a.name.get, a) }.toMap
  }
} 
Example 123
Source File: LocalKMeans.scala    From learning-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples

import java.util.Random

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet

import breeze.linalg.{Vector, DenseVector, squaredDistance}

import org.apache.spark.SparkContext._


object LocalKMeans {
  val N = 1000
  val R = 1000    // Scaling factor
  val D = 10
  val K = 10
  val convergeDist = 0.001
  val rand = new Random(42)

  def generateData = {
    def generatePoint(i: Int) = {
      DenseVector.fill(D){rand.nextDouble * R}
    }
    Array.tabulate(N)(generatePoint)
  }

  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
    var index = 0
    var bestIndex = 0
    var closest = Double.PositiveInfinity

    for (i <- 1 to centers.size) {
      val vCurr = centers.get(i).get
      val tempDist = squaredDistance(p, vCurr)
      if (tempDist < closest) {
        closest = tempDist
        bestIndex = i
      }
    }

    bestIndex
  }

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
        |Please use the KMeans method found in org.apache.spark.mllib.clustering
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {

    showWarning()

    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]
    var tempDist = 1.0

    while (points.size < K) {
      points.add(data(rand.nextInt(N)))
    }

    val iter = points.iterator
    for (i <- 1 to points.size) {
      kPoints.put(i, iter.next())
    }

    println("Initial centers: " + kPoints)

    while(tempDist > convergeDist) {
      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))

      var mappings = closest.groupBy[Int] (x => x._1)

      var pointStats = mappings.map { pair =>
        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
          case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2))
        }
      }

      var newPoints = pointStats.map {mapping =>
        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}

      tempDist = 0.0
      for (mapping <- newPoints) {
        tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2)
      }

      for (newP <- newPoints) {
        kPoints.put(newP._1, newP._2)
      }
    }

    println("Final centers: " + kPoints)
  }
} 
Example 124
Source File: CollectionExample.scala    From Scala-and-Spark-for-Big-Data-Analytics   with MIT License 5 votes vote down vote up
package com.chapter3.ScalaFP
import scala.collection._
import scala.collection.mutable.Buffer
import scala.collection.mutable.HashMap

object CollectionExample {
  def main(args: Array[String]) {
    val x = 10
    val y = 15
    val z = 19
    
    Traversable(1, 2, 3)
    Iterable("x", "y", "z")
    Map("x" -> 10, "y" -> 13, "z" -> 17)
    Set("Red", "Green", "Blue")
    SortedSet("Hello,", "world!")
    Buffer(x, y, z)
    IndexedSeq(0.0, 1.0, 2.0)
    LinearSeq(x, y, z)
    List(2, 6, 10)
    HashMap("x" -> 20, "y" -> 19, "z" -> 16)
    
    val list = List(1, 2, 3) map (_ + 1)
    println(list)
    
    val set = Set(1, 2, 3) map (_ * 2)
    println(set)
    
    val list2 = List(x, y, z).map(x => x * 3)
    println(list2)
  }
} 
Example 125
Source File: NFAStructure.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.piglet.cep.nfa
import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
import scala.collection.mutable.HashMap
import dbis.piglet.backends.{SchemaClass => Event}
import scala.collection.mutable.ListBuffer


  def addEvent(event: T, currentEdge: ForwardEdge[T]): Unit = {
    events += event
    //if (relatedValue != null) {
     // relatedValue.get(currentEdge.name.get) match {
       // case Some(x) => x.foreach (r => r.updateValue(event))
        //case None => Nil
      //}
    //}
    currenState = currentEdge.destState
    if (currenState.isInstanceOf[FinalState[T]])
      complete = true
  }
  
  override def clone(): NFAStructure[T] = {
    val copyStr = new NFAStructure[T](this.nfaController)
    copyStr.complete = this.complete
    copyStr.currenState = this.currenState
    copyStr.events = this.events.clone()
    //copyStr.events = this.events
    copyStr
  }
} 
Example 126
Source File: CorefUtils.scala    From berkeley-doc-summarizer   with GNU General Public License v3.0 5 votes vote down vote up
package edu.berkeley.nlp.summ

import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
import edu.berkeley.nlp.entity.DepConstTree
import edu.berkeley.nlp.entity.coref.Mention
import edu.berkeley.nlp.entity.coref.PronounDictionary
import edu.berkeley.nlp.entity.coref.MentionType
import edu.berkeley.nlp.entity.coref.CorefDoc
import edu.berkeley.nlp.entity.GUtil
import edu.berkeley.nlp.futile.math.SloppyMath

object CorefUtils {
  
  def getAntecedent(corefDoc: CorefDoc, predictor: edu.berkeley.nlp.entity.coref.PairwiseScorer, index: Int) = {
    val posteriors = computePosteriors(corefDoc, predictor, Seq(index))
    GUtil.argMaxIdx(posteriors(0))
  }
  
  def computePosteriors(corefDoc: CorefDoc, predictor: edu.berkeley.nlp.entity.coref.PairwiseScorer, indicesOfInterest: Seq[Int]): Array[Array[Double]] = {
    val docGraph = new edu.berkeley.nlp.entity.coref.DocumentGraph(corefDoc, false)
    Array.tabulate(indicesOfInterest.size)(idxIdxOfInterest => {
      val idx = indicesOfInterest(idxIdxOfInterest)
      val scores = Array.tabulate(idx+1)(antIdx => predictor.score(docGraph, idx, antIdx, false).toDouble)
      val logNormalizer = scores.foldLeft(Double.NegativeInfinity)(SloppyMath.logAdd(_, _))
      for (antIdx <- 0 until scores.size) {
        scores(antIdx) = scores(antIdx) - logNormalizer
      }
      scores
    })
  }
  
  
  def remapMentionType(ment: Mention) = {
    val newMentionType = if (ment.endIdx - ment.startIdx == 1 && PronounDictionary.isDemonstrative(ment.rawDoc.words(ment.sentIdx)(ment.headIdx))) {
      MentionType.DEMONSTRATIVE;
    } else if (ment.endIdx - ment.startIdx == 1 && PronounDictionary.isPronLc(ment.rawDoc.words(ment.sentIdx)(ment.headIdx))) {
      MentionType.PRONOMINAL;
    } else if (ment.rawDoc.pos(ment.sentIdx)(ment.headIdx) == "NNS" || ment.rawDoc.pos(ment.sentIdx)(ment.headIdx) == "NNPS") {
      MentionType.PROPER;
    } else {
      MentionType.NOMINAL;
    }
    new Mention(ment.rawDoc,
                ment.mentIdx,
                ment.sentIdx,
                ment.startIdx,
                ment.endIdx,
                ment.headIdx,
                ment.allHeadIndices,
                ment.isCoordinated,
                newMentionType,
                ment.nerString,
                ment.number,
                ment.gender)
                
  }
  
  def getMentionText(ment: Mention) = ment.rawDoc.words(ment.sentIdx).slice(ment.startIdx, ment.endIdx)
  
  def getMentionNerSpan(ment: Mention): Option[(Int,Int)] = {
    // Smallest NER chunk that contains the head
    val conllDoc = ment.rawDoc
    val matchingChunks = conllDoc.nerChunks(ment.sentIdx).filter(chunk => chunk.start <= ment.headIdx && ment.headIdx < chunk.end);
    if (!matchingChunks.isEmpty) {
      val smallestChunk = matchingChunks.sortBy(chunk => chunk.end - chunk.start).head;
      Some(smallestChunk.start -> smallestChunk.end)
    } else {
      None
    }
  }

  def getSpanHeads(tree: DepConstTree, startIdx: Int, endIdx: Int): Seq[Int] = getSpanHeads(tree.childParentDepMap, startIdx, endIdx);
  
  def getSpanHeads(childParentDepMap: HashMap[Int,Int], startIdx: Int, endIdx: Int): Seq[Int] = {
    // If it's a constituent, only one should have a head outside
    val outsidePointing = new ArrayBuffer[Int];
    for (i <- startIdx until endIdx) {
      val ptr = childParentDepMap(i);
      if (ptr < startIdx || ptr >= endIdx) {
        outsidePointing += i;
      }
    }
    outsidePointing
  }
  
  def isDefinitelyPerson(str: String): Boolean = {
    val canonicalization = PronounDictionary.canonicalize(str)
    // N.B. Don't check "we" or "they" because those might be used in inanimate cases
    canonicalization == "i" || canonicalization == "you" || canonicalization == "he" || canonicalization == "she" 
  }
}