java.io.Serializable Scala Examples

The following examples show how to use java.io.Serializable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: BoundedPriorityQueue.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  def poll(): A = {
    underlying.poll()
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 2
Source File: BoundedPriorityQueue.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.nn


class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  import scala.collection.JavaConverters._

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach {
      this += _
    }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear(): Unit = {
    underlying.clear()
  }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 3
Source File: MapSize.scala    From mCNN   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.ann

import java.io.Serializable
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}


object FeatureMapRolling{
  private[ann] def extractMaps(bdv: BDV[Double], size: MapSize): Array[BDM[Double]] = {
    val v = bdv.toArray
    val mapSize = size.x * size.y
    val mapNum = v.length / mapSize
    val maps = new Array[BDM[Double]](mapNum)
    var i = 0
    var offset = 0
    while(i < mapNum){
      maps(i) = new BDM(size.x, size.y, v, offset)
      offset += mapSize
      i += 1
    }
    maps
  }

  private[ann] def mergeMaps(data: Array[BDM[Double]]): BDV[Double] = {
    require(data.length > 0)
    val num = data.length
    val size = data(0).size
    val arr = new Array[Double](size * num)
    var offset = 0
    var i = 0
    while (i < num){
      System.arraycopy(data(i).toArray, 0, arr, offset, size)
      offset += size
      i += 1
    }
    val outBDM = new BDV[Double](arr)
    outBDM
  }
} 
Example 4
Source File: RichIterable.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils.richUtils

import java.io.Serializable

import is.hail.utils._

import scala.collection.{TraversableOnce, mutable}
import scala.reflect.ClassTag

class RichIterable[T](val i: Iterable[T]) extends Serializable {
  def foreachBetween(f: (T) => Unit)(g: => Unit) {
    i.iterator.foreachBetween(f)(g)
  }

  def lazyMapWith[T2, S](i2: Iterable[T2], f: (T, T2) => S): Iterable[S] =
    new Iterable[S] with Serializable {
      def iterator: Iterator[S] = new Iterator[S] {
        val it: Iterator[T] = i.iterator
        val it2: Iterator[T2] = i2.iterator

        def hasNext: Boolean = it.hasNext && it2.hasNext

        def next(): S = f(it.next(), it2.next())
      }
    }

  def lazyMapWith2[T2, T3, S](i2: Iterable[T2], i3: Iterable[T3], f: (T, T2, T3) => S): Iterable[S] =
    new Iterable[S] with Serializable {
      def iterator: Iterator[S] = new Iterator[S] {
        val it: Iterator[T] = i.iterator
        val it2: Iterator[T2] = i2.iterator
        val it3: Iterator[T3] = i3.iterator

        def hasNext: Boolean = it.hasNext && it2.hasNext && it3.hasNext

        def next(): S = f(it.next(), it2.next(), it3.next())
      }
    }

  def areDistinct(): Boolean = {
    val seen = mutable.HashSet[T]()
    for (x <- i)
      if (seen(x))
        return false
      else
        seen += x
    true
  }

  def duplicates(): Set[T] = {
    val dups = mutable.HashSet[T]()
    val seen = mutable.HashSet[T]()
    for (x <- i)
      if (seen(x))
        dups += x
      else
        seen += x
    dups.toSet
  }

  def truncatable(delim: String = ", ", toTake: Int = 10): Truncatable = new Truncatable {
    def truncate: String = if (i.size > toTake)
      i.take(toTake).mkString(delim) + delim + "..."
    else
      i.mkString(delim)

    def strings: (String, String) = (truncate, i.mkString(delim))
  }

  def counter(): Map[T, Int] = {
    val m = new mutable.HashMap[T, Int]()
    i.foreach { elem => m.updateValue(elem, 0, _ + 1) }

    m.toMap
  }

  def toFastSeq(implicit tct: ClassTag[T]): Seq[T] = toFastIndexedSeq

  def toFastIndexedSeq(implicit tct: ClassTag[T]): IndexedSeq[T] = {
    i match {
      case i: mutable.WrappedArray[T] => i
      case i: mutable.ArrayBuffer[T] => i
      case _ => i.toArray[T]
    }
  }
} 
Example 5
Source File: MultiArray2.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils

import java.io.Serializable

import scala.collection.immutable.IndexedSeq
import scala.reflect.ClassTag


class MultiArray2[@specialized(Int, Long, Float, Double, Boolean) T](val n1: Int,
                     val n2: Int,
                     val a: Array[T]) extends Serializable with Iterable[T] {

  require(n1 >= 0 && n2 >= 0)
  require(a.length == n1*n2)

  class Row(val i:Int) extends IndexedSeq[T] {
    require(i >= 0 && i < n1)
    def apply(j:Int): T = {
      if (j < 0 || j >= length) throw new ArrayIndexOutOfBoundsException
      a(i*n2 + j)
    }
    def length: Int = n2
  }

  class Column(val j:Int) extends IndexedSeq[T] {
    require(j >= 0 && j < n2)
    def apply(i:Int): T = {
      if (i < 0 || i >= length) throw new ArrayIndexOutOfBoundsException
      a(i*n2 + j)
    }
    def length: Int = n1
  }

  def row(i:Int) = new Row(i)
  def column(j:Int) = new Column(j)

  def rows: Iterable[Row] = for (i <- rowIndices) yield row(i)
  def columns: Iterable[Column] = for (j <- columnIndices) yield column(j)

  def indices: Iterable[(Int,Int)] = for (i <- 0 until n1; j <- 0 until n2) yield (i, j)

  def rowIndices: Iterable[Int] = 0 until n1

  def columnIndices: Iterable[Int] = 0 until n2

  def apply(i: Int, j: Int): T = {
    require(i >= 0 && i < n1 && j >= 0 && j < n2)
    a(i*n2 + j)
  }

  def update(i: Int, j: Int, x:T): Unit = {
    require(i >= 0 && i < n1 && j >= 0 && j < n2)
    a.update(i*n2 + j,x)
  }

  def update(t: (Int,Int), x:T): Unit = {
    require(t._1 >= 0 && t._1 < n1 && t._2 >= 0 && t._2 < n2)
    update(t._1,t._2,x)
  }

  def array: Array[T] = a

  def zip[S](other: MultiArray2[S]): MultiArray2[(T,S)] = {
    require(n1 == other.n1 && n2 == other.n2)
    new MultiArray2(n1,n2,a.zip(other.a))
  }

  def iterator: Iterator[T] = a.iterator
}

object MultiArray2 {
  def fill[T](n1: Int, n2: Int)(elem: => T)(implicit tct: ClassTag[T]): MultiArray2[T] =
    new MultiArray2[T](n1, n2, Array.fill[T](n1 * n2)(elem))

  def empty[T](implicit tct: ClassTag[T]): MultiArray2[T] =
    new MultiArray2[T](0, 0, Array.empty[T](tct))
} 
Example 6
Source File: SerializableHadoopConfiguration.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils

import java.io.{ObjectInputStream, ObjectOutputStream, Serializable}

import org.apache.hadoop

class SerializableHadoopConfiguration(@transient var value: hadoop.conf.Configuration) extends Serializable {
  private def writeObject(out: ObjectOutputStream) {
    out.defaultWriteObject()
    value.write(out)
  }

  private def readObject(in: ObjectInputStream) {
    value = new hadoop.conf.Configuration(false)
    value.readFields(in)
  }
} 
Example 7
Source File: Util.scala    From opencv-darts   with GNU General Public License v3.0 5 votes vote down vote up
package darts

import java.io.Serializable
import javax.swing.JFrame

import org.bytedeco.javacpp.indexer.{FloatIndexer, FloatRawIndexer}
import org.bytedeco.javacpp.opencv_core.{CV_32FC3, Mat, MatVector, Point3f}
import org.bytedeco.javacpp.opencv_imgproc.{DIST_L2, fitLine}
import org.bytedeco.javacv.CanvasFrame
import org.bytedeco.javacv.OpenCVFrameConverter.ToMat

import scala.collection.mutable


  def toMatPoint3f(points: Seq[Point3f]): Mat = {
    // Create Mat representing a vector of Points3f
    val dest = new Mat(1, points.size, CV_32FC3)
    val indx = dest.createIndexer().asInstanceOf[FloatIndexer]
    for (i <- points.indices) {
      val p = points(i)
      indx.put(0, i, 0, p.x)
      indx.put(0, i, 1, p.y)
      indx.put(0, i, 2, p.z)
    }
    dest
  }

} 
Example 8
Source File: ApspResult.scala    From spark-all-pairs-shortest-path   with Apache License 2.0 5 votes vote down vote up
import java.io.Serializable
import org.apache.spark.mllib.linalg.Matrix
import org.apache.spark.mllib.linalg.distributed.BlockMatrix
import org.apache.spark.Logging
import org.apache.spark.storage.StorageLevel



class ApspResult (
                 var size: Long,
                 var distMatrix: BlockMatrix)
  extends Serializable with Logging{

  validateResult(distMatrix)

  private def validateResult(result: BlockMatrix): Unit = {
    require(result.numRows == result.numCols,
      "The shortest distance matrix is not square.")
    require(size == result.numRows,
      s"The size of the shortest distance matrix does not match $size.")
    if (result.blocks.getStorageLevel == StorageLevel.NONE) {
      logWarning("The APSP result is not cached. Lookup could be slow")
    }
  }

  def lookupDist(srcId: Long, dstId: Long): Double = {
    val sizePerBlock = distMatrix.rowsPerBlock
    val rowBlockId = (srcId/sizePerBlock).toInt
    val colBlockId = (dstId/sizePerBlock).toInt
    val block = distMatrix.blocks.filter{case ((i, j), _) => ( i == rowBlockId) & (j == colBlockId)}
      .first._2
    block.toArray((dstId % sizePerBlock).toInt * block.numRows + (srcId % sizePerBlock).toInt)
  }

  def toLocal(): Matrix = {
    distMatrix.toLocalMatrix()
  }
} 
Example 9
Source File: BoundedPriorityQueue.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 10
Source File: MqttConfig.scala    From akka-iot-mqtt-v2   with GNU Lesser General Public License v3.0 5 votes vote down vote up
package akkaiot

import scala.concurrent.duration._

import java.io.Serializable
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.sandinh.paho.akka._
import com.sandinh.paho.akka.MqttPubSub._

object MqttConfig {
  val topic = "akka-iot-mqtt-topic"

  // Pub-Sub config
  val psConfig = PSConfig(
    brokerUrl = "tcp://test.mosquitto.org:1883",
    userName = null,
    password = null,
    stashTimeToLive = 1.minute,
    stashCapacity = 8000,
    reconnectDelayMin = 10.millis,
    reconnectDelayMax = 30.seconds,
    cleanSession = false
  )

  // Serialize object to byte array
  def writeToByteArray(obj: Any): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val oos = new ObjectOutputStream(baos)
    try {
      oos.writeObject(obj)
      baos.toByteArray
    } finally {
      try {
        oos.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }

  // Deserialize object from byte array
  def readFromByteArray[A](bytes: Array[Byte]): A = {
    val bais = new ByteArrayInputStream(bytes)
    val ois = new ObjectInputStream(bais)
    try {
      val obj = ois.readObject
      obj.asInstanceOf[A]
    } finally {
      try {
        ois.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }
} 
Example 11
Source File: BoundedPriorityQueue.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 12
Source File: KeyFamilyQualifier.scala    From SparkOnHBase   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark

import java.io.Serializable

import org.apache.hadoop.hbase.util.Bytes


class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
  extends Comparable[KeyFamilyQualifier] with Serializable {
  override def compareTo(o: KeyFamilyQualifier): Int = {
    var result = Bytes.compareTo(rowKey, o.rowKey)
    if (result == 0) {
      result = Bytes.compareTo(family, o.family)
      if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
    }
    result
  }
  override def toString: String = {
    Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
  }
} 
Example 13
Source File: ByteArrayWrapper.scala    From SparkOnHBase   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark

import java.io.Serializable

import org.apache.hadoop.hbase.util.Bytes


class ByteArrayWrapper (var value:Array[Byte])
  extends Comparable[ByteArrayWrapper] with Serializable {
  override def compareTo(valueOther: ByteArrayWrapper): Int = {
    Bytes.compareTo(value,valueOther.value)
  }
  override def equals(o2: Any): Boolean = {
    o2 match {
      case wrapper: ByteArrayWrapper =>
        Bytes.equals(value, wrapper.value)
      case _ =>
        false
    }
  }
  override def hashCode():Int = {
    Bytes.hashCode(value)
  }
} 
Example 14
Source File: BoundedPriorityQueue.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 15
Source File: Schemas.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.nn

import java.io.Serializable

import breeze.linalg.DenseVector

private[ml] case class InnerNode(override val ball: Ball,
                                 leftChild: Node,
                                 rightChild: Node) extends Node {
  override def toString: String = {
    s"InnerNode with ${ball.toString}}."
  }
}

private[ml] case class LeafNode(pointIdx: Seq[Int],
                                override val ball: Ball
                               ) extends Node {
  override def toString: String = {
    s"LeafNode with ${ball.toString}} \n " +
      s"and data size of ${pointIdx.length} (example point: ${pointIdx.take(1)}})"
  }
}

private[ml] trait Node extends Serializable {
  def ball: Ball

}

private[ml] case class Ball(mu: DenseVector[Double], radius: Double) extends Serializable

case class BestMatch(index: Int, distance: Double) extends Serializable 
Example 16
Source File: BoundedPriorityQueue.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 17
Source File: SampleFilters.scala    From infinispan-spark   with Apache License 2.0 5 votes vote down vote up
package org.infinispan.spark.test

import java.io.Serializable

import org.infinispan.filter._
import org.infinispan.metadata.Metadata
import org.infinispan.spark.domain.{Person, Runner}

@NamedFactory(name = "sample-filter-factory")
class SampleFilterFactory extends KeyValueFilterConverterFactory[Int, Runner, String] with Serializable {
   override def getFilterConverter = new SampleFilter

   class SampleFilter extends AbstractKeyValueFilterConverter[Int, Runner, String] with Serializable {
      override def filterAndConvert(k: Int, v: Runner, metadata: Metadata): String = if (k % 2 == 0) v.getName else null
   }

}

@NamedFactory(name = "sample-filter-factory-with-param")
class SampleFilterFactoryWithParam extends ParamKeyValueFilterConverterFactory[Int, Runner, String] with Serializable {
   override def getFilterConverter(params: Array[AnyRef]): KeyValueFilterConverter[Int, Runner, String] = new SampleFilterParam(params)

   class SampleFilterParam(params: Array[AnyRef]) extends AbstractKeyValueFilterConverter[Int, Runner, String] with Serializable {
      override def filterAndConvert(k: Int, v: Runner, metadata: Metadata): String = {
         val length = params(0).asInstanceOf[Int]
         v.getName.substring(0, length)
      }
   }

}

@NamedFactory(name = "age-filter")
class AgeFilterFactory extends ParamKeyValueFilterConverterFactory[Int, Person, Person] with Serializable {
   override def getFilterConverter(params: Array[AnyRef]): KeyValueFilterConverter[Int, Person, Person] =
      new AgeFilter(params(0).asInstanceOf[Int], params(1).asInstanceOf[Int])

   class AgeFilter(minimumAge: Int, maximumAge: Int) extends AbstractKeyValueFilterConverter[Int, Person, Person] with Serializable {
      override def filterAndConvert(key: Int, value: Person, metadata: Metadata): Person = {
         val age = value.getAge
         if (age >= minimumAge && age <= maximumAge) value else null
      }
   }

}

object FilterDefs {
   val list = List(
      new FilterDef(factoryClass = classOf[SampleFilterFactory], classes = Seq(classOf[SampleFilterFactory#SampleFilter])),
      new FilterDef(classOf[SampleFilterFactoryWithParam], classes = Seq(classOf[SampleFilterFactoryWithParam#SampleFilterParam])),
      new FilterDef(factoryClass = classOf[AgeFilterFactory], classes = Seq(classOf[AgeFilterFactory#AgeFilter]))
   )
} 
Example 18
Source File: SparkPredictionTrainer.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._

import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}
import java.time.DayOfWeek._

import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel

object SparkPredictionTrainer extends App with SparkPredictionProcessor {
  log.setLevel(Level.WARN)

  val (properties, targets, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl) = setup(args)

  val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
  println("STREAMING_DURATION = " + streamingDuration)

  new Thread(new Runnable {
              def run() {
                 while( true ){
                   try {
                     val data = SparkPredictionProcessor.getData(sc, THRESHOLD)
                     val model = trainer.fit(data)
                     model.write.overwrite.save(PREDICTION_MODEL_PATH)
                     println("New model of size " + data.count() + " trained: " + model.uid)
                     Thread.sleep(streamingDuration)
                   } catch {
                     case e: Throwable => log.error(e)
                   }
                 }
              }
             }).start()
} 
Example 19
Source File: SparkProcessor.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._

import io.nats.client.Nats._
import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}

trait SparkProcessor {
  def setup(args: Array[String]) = {
    val inputSubject = args(0)
//    val inputNatsStreaming = inputSubject.toUpperCase.contains("STREAMING")
    val outputSubject = args(1)
//    val outputNatsStreaming = outputSubject.toUpperCase.contains("STREAMING")
    println("Will process messages from '" + inputSubject + "' to '" + outputSubject + "'")

    val logLevel = scala.util.Properties.envOrElse("LOG_LEVEL", "INFO")
    println("LOG_LEVEL = " + logLevel)

    val targets = scala.util.Properties.envOrElse("TARGETS", "ALL")
    println("TARGETS = " + targets)

    val cassandraUrl = System.getenv("CASSANDRA_URL")
    println("CASSANDRA_URL = " + cassandraUrl)

    val sparkMasterUrl = System.getenv("SPARK_MASTER_URL")
    println("SPARK_MASTER_URL = " + sparkMasterUrl)

    val sparkCoresMax = System.getenv("SPARK_CORES_MAX")
    println("SPARK_CORES_MAX = " + sparkCoresMax)

    val conf = new SparkConf()
                  .setAppName(args(2))
                  .setMaster(sparkMasterUrl)
                  .set("spark.cores.max", sparkCoresMax)
                  .set("spark.cassandra.connection.host", cassandraUrl);
    val sc = new SparkContext(conf);

//    val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
//    val ssc = new StreamingContext(sc, new Duration(streamingDuration));
///    ssc.checkpoint("/spark/storage")

    val properties = new Properties();
    val natsUrl = System.getenv("NATS_URI")
    println("NATS_URI = " + natsUrl)
    properties.put("servers", natsUrl)
    properties.put(PROP_URL, natsUrl)

    val clusterId = System.getenv("NATS_CLUSTER_ID")

    val inputNatsStreaming = inputSubject.toUpperCase.contains("STREAMING")
    val outputNatsStreaming = outputSubject.toUpperCase.contains("STREAMING")

    (properties, targets, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl)
  }

  def dataDecoder: Array[Byte] => Tuple2[Long,Float] = bytes => {
        val buffer = ByteBuffer.wrap(bytes);
        val epoch = buffer.getLong()
        val value = buffer.getFloat()
        (epoch, value)
      }
}


trait SparkStreamingProcessor extends SparkProcessor {
  def setupStreaming(args: Array[String]) = {
    val (properties, target, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl) = setup(args)

    val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
    println("STREAMING_DURATION = " + streamingDuration)

    val ssc = new StreamingContext(sc, new Duration(streamingDuration));
//    ssc.checkpoint("/spark/storage")

    (properties, target, logLevel, sc, ssc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl, streamingDuration)
  }
} 
Example 20
Source File: SparkTemperatureProcessor.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._
import com.datastax.spark.connector.streaming._
import com.datastax.spark.connector.SomeColumns

import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}

object SparkTemperatureProcessor extends App with SparkStreamingProcessor {
  val log = LogManager.getRootLogger
  log.setLevel(Level.WARN)

  val (properties, target, logLevel, sc, ssc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl, streamingDuration) =
    setupStreaming(args)

  // Temperatures //

  val temperatures =
    if (inputNatsStreaming) {
      NatsToSparkConnector
        .receiveFromNatsStreaming(classOf[Tuple2[Long,Float]], StorageLevel.MEMORY_ONLY, clusterId)
        .withNatsURL(natsUrl)
        .withSubjects(inputSubject)
        .withDataDecoder(dataDecoder)
        .asStreamOf(ssc)
    } else {
      NatsToSparkConnector
        .receiveFromNats(classOf[Tuple2[Long,Float]], StorageLevel.MEMORY_ONLY)
        .withProperties(properties)
        .withSubjects(inputSubject)
        .withDataDecoder(dataDecoder)
        .asStreamOf(ssc)
    }

  // Ideally, should be the AVG
  val singleTemperature = temperatures.reduceByKey(Math.max(_,_))

  if (logLevel.contains("TEMPERATURE")) {
    singleTemperature.print()
  }

  singleTemperature.saveToCassandra("smartmeter", "temperature")

  val temperatureReport = singleTemperature.map({case (epoch, temperature) => (s"""{"epoch": $epoch, "temperature": $temperature}""") })
  SparkToNatsConnectorPool.newPool()
                      .withProperties(properties)
                      .withSubjects(outputSubject) // "smartmeter.extract.temperature"
                      .publishToNats(temperatureReport)

  // Start //
  ssc.start();

  ssc.awaitTermination()
} 
Example 21
Source File: SparkBatch.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
import org.apache.log4j.Logger

import org.apache.spark.sql.SparkSession

//import com.datastax.spark.connector._
//import com.datastax.spark.connector.cql.CassandraConnector

// @see http://stackoverflow.com/questions/39423131/how-to-use-cassandra-context-in-spark-2-0
// @see https://databricks.com/blog/2016/08/15/how-to-use-sparksession-in-apache-spark-2-0.html
// @see https://dzone.com/articles/cassandra-with-spark-20-building-rest-api
object SparkBatch extends App {
  val logLevel = System.getenv("APP_BATCH_LOG_LEVEL")
  println("APP_BATCH_LOG_LEVEL = " + logLevel)
  if ("DEBUG" != logLevel) {
  	Logger.getLogger("org").setLevel(Level.OFF)
  }
  
  val cassandraUrl = System.getenv("CASSANDRA_URL")
  println("CASSANDRA_URL = " + cassandraUrl)
  
  val sparkMasterUrl = System.getenv("SPARK_MASTER_URL")
  println("SPARK_MASTER_URL = " + sparkMasterUrl)
  
  val spark = SparkSession
    .builder()
    .master(sparkMasterUrl)
    .appName("Smartmeter Batch")
    .config("spark.cassandra.connection.host", cassandraUrl)
    //   .config("spark.sql.warehouse.dir", warehouseLocation)
    //.enableHiveSupport()
    .getOrCreate()
  
  spark
    .read
    .format("org.apache.spark.sql.cassandra")
    .options(Map("keyspace" -> "smartmeter", "table" -> "raw_data"))
    .load
    .createOrReplaceTempView("raw_data")
  
  val rawVoltageData = spark.sql("select * from raw_data")
  rawVoltageData.show(10)
  
  
  // @see http://stackoverflow.com/questions/40324153/what-is-the-best-way-to-insert-update-rows-in-cassandra-table-via-java-spark
  //Save data to Cassandra
  import org.apache.spark.sql.SaveMode
  avgByTransformer.write.format("org.apache.spark.sql.cassandra").options(Map("keyspace" -> "smartmeter", "table" -> "avg_voltage_by_transformer")).mode(SaveMode.Overwrite).save();
} 
Example 22
Source File: PointObj.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.spark.streamclustering

final case class Prototype(
  var protoPartNum: Vector[Double],
  var idsDataAssigned : Set[Int],
  val id: Int
) extends Serializable {
  
  override def toString: String = {toStringProto
    "node: "+id +" -> " + protoPartNum.toArray.deep.mkString(", ")
  }

  def toStringIds: String = {

    "node: " + id + " (" + idsDataAssigned.size + " data-points)" + " -> "  + idsDataAssigned.toArray.deep.mkString(", ")
  }
  
  def toStringProto: String = {
    protoPartNum.toArray.deep.mkString(", ")
  }

  def toStringCard: String = {
    idsDataAssigned.size.toString()
  }
  
  def toStringAss: String = {
    idsDataAssigned.toArray.deep.mkString(", ")
  }
  
  def toStringId: String = {
    id.toString()
  }

} 
Example 23
Source File: KeyFamilyQualifier.scala    From hbase-connectors   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark

import java.io.Serializable

import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes


@InterfaceAudience.Public
class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
  extends Comparable[KeyFamilyQualifier] with Serializable {
  override def compareTo(o: KeyFamilyQualifier): Int = {
    var result = Bytes.compareTo(rowKey, o.rowKey)
    if (result == 0) {
      result = Bytes.compareTo(family, o.family)
      if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
    }
    result
  }
  override def toString: String = {
    Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
  }
} 
Example 24
Source File: ByteArrayWrapper.scala    From hbase-connectors   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark

import java.io.Serializable

import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes


@InterfaceAudience.Public
class ByteArrayWrapper (var value:Array[Byte])
  extends Comparable[ByteArrayWrapper] with Serializable {
  override def compareTo(valueOther: ByteArrayWrapper): Int = {
    Bytes.compareTo(value,valueOther.value)
  }
  override def equals(o2: Any): Boolean = {
    o2 match {
      case wrapper: ByteArrayWrapper =>
        Bytes.equals(value, wrapper.value)
      case _ =>
        false
    }
  }
  override def hashCode():Int = {
    Bytes.hashCode(value)
  }
} 
Example 25
Source File: BoundedPriorityQueue.scala    From albedo   with MIT License 5 votes vote down vote up
// COPY DIRECTLY FROM SPARK SOURCE CODE

package ws.vinta.albedo.recommenders

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 26
Source File: ObjectExamples.scala    From scala-tutorials   with MIT License 5 votes vote down vote up
package com.baeldung.scala.classvsobject

import java.io.Serializable

object ObjectExamples {

  object Router {
    val baseUrl: String = "https://www.baeldung.com"

    case class Response(baseUrl: String, path: String, action: String)

    private def getAction(path: String): Response = {
      Response(baseUrl, path, "GET")
    }

    private def postAction(path: String): Response = {
      Response(baseUrl, path, "POST")
    }

    private def patchAction(path: String): Response = {
      Response(baseUrl, path, "PATCH")
    }

    private def putAction(path: String): Response = {
      Response(baseUrl, path, "PUT")
    }

    private def deleteAction(path: String): Response = {
      Response(baseUrl, path, "DELETE")
    }
  }

  class Router(path: String) {
    import Router._
    def get(): Response = getAction(path)
    def post(): Response = postAction(path)
    def patch(): Response = patchAction(path)
    def put(): Response = putAction(path)
    def delete(): Response = deleteAction(path)
  }

  sealed class BaeldungEnvironment extends Serializable {
    val name: String = "int"
  }

  object BaeldungEnvironment {

    case class ProductionEnvironment() extends BaeldungEnvironment {
      override val name: String = "production"
    }

    case class StagingEnvironment() extends BaeldungEnvironment {
      override val name: String = "staging"
    }

    case class IntEnvironment() extends BaeldungEnvironment {
      override val name: String = "int"
    }

    case class TestEnvironment() extends BaeldungEnvironment {
      override val name: String = "test"
    }

    def fromEnvString(env: String): Option[BaeldungEnvironment] = {
      env.toLowerCase match {
        case "int"        => Some(IntEnvironment())
        case "staging"    => Some(StagingEnvironment())
        case "production" => Some(ProductionEnvironment())
        case "test"       => Some(TestEnvironment())
        case _            => None
      }
    }
  }
} 
Example 27
Source File: BoundedUniquePriorityQueue.scala    From spark3D   with Apache License 2.0 5 votes vote down vote up
package com.astrolabsoftware.spark3d.utils

import java.io.Serializable

import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D

import scala.collection.generic.Growable
import scala.collection.mutable

import collection.mutable.PriorityQueue


class BoundedUniquePriorityQueue[A <: Shape3D](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  // underlying base priority queue
  private val underlying = new PriorityQueue[A]()(ord)

  // HashSet of elements contained in the priority queue used to ensure uniqueness of the elements
  // in the priority queue.
  private val containedElements = new mutable.HashSet[Int]()

  override def iterator: Iterator[A] = underlying.iterator

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    val elementHash = elem.center.getCoordinate.hashCode
    // check if element to be inserted is unique or not
    if (!containedElements.contains(elementHash)) {
      if (size < maxSize) {
        underlying.enqueue(elem)
        containedElements.add(elementHash)
      } else {
        maybeReplaceLowest(elem)
      }
    }

    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.head
    // Definition of scala.Ordering.get(x, y) is:
    // Returns true iff y comes before x in the ordering and is not the same as x.
    if (head != null && ord.gt(head, a)) {
      underlying.dequeue
      underlying.enqueue(a)
      containedElements.add(a.center.getCoordinate.hashCode)
      containedElements.remove(head.center.getCoordinate.hashCode)
      true
    } else {
      false
    }
  }
} 
Example 28
Source File: ReflectionUtils.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.serving.core.utils

import java.io.Serializable
import java.net.URLClassLoader

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.sdk.pipeline.aggregation.cube.DimensionType
import com.stratio.sparta.sdk.pipeline.aggregation.operator.Operator
import com.stratio.sparta.sdk.pipeline.input.Input
import com.stratio.sparta.sdk.pipeline.output.Output
import com.stratio.sparta.sdk.pipeline.transformation.Parser
import org.reflections.Reflections
import com.stratio.sparta.serving.core.exception.ServingCoreException

import scala.collection.JavaConversions._

class ReflectionUtils extends SLF4JLogging {

  def tryToInstantiate[C](classAndPackage: String, block: Class[_] => C): C = {
    val clazMap: Map[String, String] = getClasspathMap
    val finalClazzToInstance = clazMap.getOrElse(classAndPackage, classAndPackage)
    try {
      val clazz = Class.forName(finalClazzToInstance)
      block(clazz)
    } catch {
      case cnfe: ClassNotFoundException =>
        throw ServingCoreException.create(
          "Class with name " + classAndPackage + " Cannot be found in the classpath.", cnfe)
      case ie: InstantiationException =>
        throw ServingCoreException.create("Class with name " + classAndPackage + " cannot be instantiated", ie)
      case e: Exception =>
        throw ServingCoreException.create("Generic error trying to instantiate " + classAndPackage, e)
    }
  }

  def instantiateParameterizable[C](clazz: Class[_], properties: Map[String, Serializable]): C =
    clazz.getDeclaredConstructor(classOf[Map[String, Serializable]]).newInstance(properties).asInstanceOf[C]

  def printClassPath(cl: ClassLoader): Unit = {
    val urls = cl.asInstanceOf[URLClassLoader].getURLs()
    urls.foreach(url => log.debug(url.getFile))
  }

  lazy val getClasspathMap: Map[String, String] = {
    val reflections = new Reflections("com.stratio.sparta")

    try {
      log.debug("#######")
      log.debug("####### SPARK MUTABLE_URL_CLASS_LOADER:")
      log.debug(getClass.getClassLoader.toString)
      printClassPath(getClass.getClassLoader)
      log.debug("#######")
      log.debug("####### APP_CLASS_LOADER / SYSTEM CLASSLOADER:")
      log.debug(ClassLoader.getSystemClassLoader().toString)
      printClassPath(ClassLoader.getSystemClassLoader())
      log.debug("#######")
      log.debug("####### EXTRA_CLASS_LOADER:")
      log.debug(getClass.getClassLoader.getParent.getParent.toString)
      printClassPath(getClass.getClassLoader.getParent.getParent)
    } catch {
      case e: Exception => //nothing
    }

    val inputs = reflections.getSubTypesOf(classOf[Input]).toList
    val dimensionTypes = reflections.getSubTypesOf(classOf[DimensionType]).toList
    val operators = reflections.getSubTypesOf(classOf[Operator]).toList
    val outputs = reflections.getSubTypesOf(classOf[Output]).toList
    val parsers = reflections.getSubTypesOf(classOf[Parser]).toList
    val plugins = inputs ++ dimensionTypes ++ operators ++ outputs ++ parsers
    val result = plugins map (t => t.getSimpleName -> t.getCanonicalName) toMap

    log.debug("#######")
    log.debug("####### Plugins to be loaded:")
    result.foreach {
      case (simpleName: String, canonicalName: String) => log.debug(s"$canonicalName")
    }

    result
  }
} 
Example 29
Source File: BoundedPriorityQueue.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 30
Source File: KeyFamilyQualifier.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.s2jobs.spark

import java.io.Serializable

import org.apache.hadoop.hbase.util.Bytes


class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
  extends Comparable[KeyFamilyQualifier] with Serializable {
  override def compareTo(o: KeyFamilyQualifier): Int = {
    var result = Bytes.compareTo(rowKey, o.rowKey)
    if (result == 0) {
      result = Bytes.compareTo(family, o.family)
      if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
    }
    result
  }
  override def toString: String = {
    Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
  }
} 
Example 31
Source File: KeyFamilyQualifier.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.loader.spark

import java.io.Serializable

import org.apache.hadoop.hbase.util.Bytes


class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
  extends Comparable[KeyFamilyQualifier] with Serializable {
  override def compareTo(o: KeyFamilyQualifier): Int = {
    var result = Bytes.compareTo(rowKey, o.rowKey)
    if (result == 0) {
      result = Bytes.compareTo(family, o.family)
      if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier)
    }
    result
  }
  override def toString: String = {
    Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
  }
} 
Example 32
Source File: RocksMapTest.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.store

import java.io.Serializable
import java.nio.file.{Paths, Files}

import org.apache.commons.io.FileUtils
import org.scalatest.{Matchers, FlatSpec}


case class TestObject(a: Int, b: String, c: Array[Int], d: Array[String]) extends Serializable {

  def equals(other: TestObject): Boolean = {
    this.a.equals(other.a) && this.b.equals(other.b) && this.c.sameElements(other.c) && this.d.sameElements(other.d)
  }

}

case class ComplexTestObject(a: Int, b: TestObject) extends Serializable {
  def equals(other: ComplexTestObject): Boolean = {
    this.a.equals(other.a) && this.b.equals(other.b)
  }
}

class RocksMapTest extends FlatSpec with Matchers {

  "RocksMap" should "serialize and deserialize the keys and values" in {
    val db = new RocksMap("test")

    val a: Int = 1
    val b: String = "hello"
    val c: Array[Int] = Array(1, 2, 3)

    val d: Array[String] = Array("a", "b", "c")

    val serialized_a = db.serialize(a)
    val serialized_b = db.serialize(b)
    val serialized_c = db.serialize(c)
    val serialized_d = db.serialize(d)
    val serialized_TestObject = db.serialize(TestObject(a, b, c, d))
    val serialized_ComplexObject = db.serialize(ComplexTestObject(a, TestObject(a, b, c, d)))

    db.deserialize[Int](serialized_a) should be(a)
    db.deserialize[String](serialized_b) should be(b)
    db.deserialize[Array[Int]](serialized_c) should be(c)
    db.deserialize[Array[String]](serialized_d) should be(d)
    db.deserialize[TestObject](serialized_TestObject).equals(TestObject(a, b, c, d)) should be(true)
    db.deserialize[ComplexTestObject](serialized_ComplexObject).equals(ComplexTestObject(a, TestObject(a, b, c, d))) should be(true)
    db.drop()
    db.close()
  }

  it should "put and get values" in {
    val db = new RocksMap("test")

    db.put(1, 1.0)
    db.get[Int, Double](1).getOrElse(0) should be(1.0)
    db.clear()
    db.drop()
    db.close()
  }

  it should "remove values" in {
    val db = new RocksMap("test")

    db.put(1, 1L)
    db.get[Int, Long](1).getOrElse(0) should be(1L)
    db.remove(1)
    db.get[Int, Long](1) should be(None)
    db.drop()
    db.close()
  }

  it should "clear all the values" in {
    val db = new RocksMap(name = "test")
    db.put(1, "hello")
    db.put(2, "yello")
    db.get(1) should not be (None)
    db.get(2) should not be (None)
    db.clear()
    db.get(1) should be(None)
    db.get(2) should be(None)
    db.drop()
    db.close()
  }

  it should "clear the data files when drop is called" in {
    val db = new RocksMap(name = "test")
    Files.exists(Paths.get(db.pathString)) should be (true)
    db.drop()
    Files.exists(Paths.get(db.pathString)) should be (false)
    db.close()
  }


} 
Example 33
Source File: SparkContextFunctions.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.spark

import java.io.Serializable

import com.basho.riak.client.core.query.Namespace
import com.basho.riak.spark.rdd.connector.RiakConnector
import com.basho.riak.spark.rdd.mapper.ReadDataMapperFactory
import com.basho.riak.spark.rdd.{ReadConf, RiakRDD, RiakTSRDD}
import org.apache.spark.SparkContext
import org.apache.spark.sql.types.StructType

import scala.reflect.ClassTag

class SparkContextFunctions(@transient val sc: SparkContext) extends Serializable {

  def riakTSTable[T](bucketName: String,
                     readConf: ReadConf = ReadConf(sc.getConf),
                     schema: Option[StructType] = None
                    )(implicit
                      ct: ClassTag[T],
                      connector: RiakConnector = RiakConnector(sc.getConf)
                    ): RiakTSRDD[T] = RiakTSRDD[T](sc, bucketName, readConf = readConf, schema = schema)

  def riakBucket[T](bucketName: String,
                    bucketType: String = "default"
                   )(implicit
                     connector: RiakConnector = RiakConnector(sc.getConf),
                     ct: ClassTag[T],
                     rdmf: ReadDataMapperFactory[T]
                   ): RiakRDD[T] =
    new RiakRDD[T](sc, connector, bucketType, bucketName, readConf = ReadConf(sc.getConf))

  def riakBucket[T](ns: Namespace
                   )(implicit
                     ct: ClassTag[T],
                     rdmf: ReadDataMapperFactory[T]
                   ): RiakRDD[T] = riakBucket[T](ns.getBucketNameAsString, ns.getBucketTypeAsString)
} 
Example 34
Source File: RecommenderSystem.scala    From recommendersystem   with Apache License 2.0 5 votes vote down vote up
package com.infosupport.recommendedcontent.core

import java.io.Serializable

import akka.actor.{Props, Actor, ActorLogging}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel


  private def generateRecommendations(userId: Int, count: Int) = {
    log.info(s"Generating ${count} recommendations for user with ID ${userId}")

    // Generate recommendations based on the machine learning model.
    // When there's no trained model return an empty list instead.
    val results = model match {
      case Some(m) => m.recommendProducts(userId,count)
        .map(rating => Recommendation(rating.product,rating.rating))
        .toList

      case None => Nil
    }

    sender ! Recommendations(results)
  }
} 
Example 35
Source File: Token.scala    From meteorite-core   with Apache License 2.0 5 votes vote down vote up
package bi.meteorite.core.security.tokenprovider

import bi.meteorite.core.api.security.tokenprovider.IToken
import java.io.Serializable

import scala.collection.mutable


  def isExpired(validityDuration: Long): Boolean = {
    if (validityDuration < 0) {
      return false
    }
    else if (validityDuration == 0) {
      return true
    }
    val expiryDate: Long = mtimestamp + validityDuration
    System.currentTimeMillis >= expiryDate
  }

  override def clone: Token = {
    val clone: Token = new Token(mtoken, mtokenSecret, mtimestamp)
    if (getProperties != null) {
      clone.setProperties(clone.getProperties.toMap)
    }
    clone
  }

  private def clone(map: collection.mutable.Map[String, String]): collection.mutable.Map[String, String] = {
    val newMap = collection.mutable.Map[String, String]()
    for (key <- map.keySet) {
      newMap.put(key, map.get(key).get)
    }
    newMap
  }
} 
Example 36
Source File: Literal.scala    From dagon   with Apache License 2.0 5 votes vote down vote up
package com.stripe.dagon

import java.io.Serializable
import scala.util.hashing.MurmurHash3
import scala.util.control.TailCalls


  private def eqFn[N[_]]: Function[RefPair[Literal[N, _], Literal[N, _]], Boolean] =
    Memoize.function[RefPair[Literal[N, _], Literal[N, _]], Boolean] {
      case (pair, _) if pair.itemsEq => true
      case (RefPair(Const(a), Const(b)), _) => a == b
      case (RefPair(Unary(left, fa), Unary(right, fb)), rec) =>
        (fa == fb) && rec(RefPair(left, right))
      case (RefPair(Binary(lefta, righta, fa), Binary(leftb, rightb, fb)), rec) =>
        (fa == fb) && rec(RefPair(lefta, leftb)) && rec(RefPair(righta, rightb))
      case (RefPair(Variadic(argsa, fa), Variadic(argsb, fb)), rec) =>
        @annotation.tailrec
        def loop(left: List[Literal[N, _]], right: List[Literal[N, _]]): Boolean =
          (left, right) match {
            case (lh :: ltail, rh :: rtail) =>
              rec(RefPair(lh, rh)) && loop(ltail, rtail)
            case (Nil, Nil) => true
            case _ => false
          }

        (fa == fb) && loop(argsa, argsb)
      case other => false
    }
} 
Example 37
Source File: FunctionK.scala    From dagon   with Apache License 2.0 5 votes vote down vote up
package com.stripe.dagon

import java.io.Serializable

trait FunctionK[T[_], R[_]] extends Serializable {
  def apply[U](tu: T[U]): R[U] =
    toFunction[U](tu)

  def toFunction[U]: T[U] => R[U]
}

object FunctionK {
  def andThen[A[_], B[_], C[_]](first: FunctionK[A, B], second: FunctionK[B, C]): FunctionK[A, C] =
    new FunctionK[A, C] {
      def toFunction[U] = first.toFunction[U].andThen(second.toFunction[U])
    }
} 
Example 38
Source File: OneElementConcurrentQueue.scala    From zio   with Apache License 2.0 5 votes vote down vote up
package zio.internal.impls

import java.io.Serializable
import java.util.concurrent.atomic.{ AtomicReference, LongAdder }

import zio.internal.MutableConcurrentQueue


final class OneElementConcurrentQueue[A] extends MutableConcurrentQueue[A] with Serializable {
  private[this] val ref      = new AtomicReference[AnyRef]()
  private[this] val deqAdder = new LongAdder()

  override final val capacity = 1

  override def dequeuedCount(): Long = deqAdder.sum()
  override def enqueuedCount(): Long =
    if (isEmpty()) dequeuedCount() else dequeuedCount() + 1

  override def isEmpty(): Boolean = ref.get() == null
  override def isFull(): Boolean  = !isEmpty()

  override def offer(a: A): Boolean = {
    assert(a != null)

    val aRef    = ref
    var ret     = false
    var looping = true

    while (looping) {
      if (aRef.get() != null) looping = false
      else {
        if (aRef.compareAndSet(null, a.asInstanceOf[AnyRef])) {
          ret = true
          looping = false
        }
      }
    }

    ret
  }

  override def poll(default: A): A = {
    var ret     = default
    var looping = true
    val aRef    = ref
    var el      = null.asInstanceOf[AnyRef]

    while (looping) {
      el = aRef.get()
      if (el == null) looping = false
      else {
        if (aRef.compareAndSet(el, null)) {
          ret = el.asInstanceOf[A]
          deqAdder.increment()
          looping = false
        }
      }
    }

    ret
  }

  override def size(): Int = if (isEmpty()) 0 else 1
} 
Example 39
Source File: OneElementConcurrentQueue.scala    From zio   with Apache License 2.0 5 votes vote down vote up
package zio.internal.impls

import java.io.Serializable
import java.util.concurrent.atomic.{ AtomicBoolean, AtomicLong, AtomicReference }

import zio.internal.MutableConcurrentQueue

final class OneElementConcurrentQueue[A] extends MutableConcurrentQueue[A] with Serializable {
  private[this] val ref = new AtomicReference[AnyRef]()

  private[this] val headCounter   = new AtomicLong(0L)
  private[this] val deqInProgress = new AtomicBoolean(false)

  private[this] val tailCounter   = new AtomicLong(0L)
  private[this] val enqInProgress = new AtomicBoolean(false)

  override final val capacity = 1

  override def dequeuedCount(): Long = headCounter.get()
  override def enqueuedCount(): Long = tailCounter.get()

  override def isEmpty(): Boolean = ref.get() == null
  override def isFull(): Boolean  = !isEmpty()

  override def offer(a: A): Boolean = {
    assert(a != null)

    var res     = false
    var looping = true

    while (looping) {
      if (isFull()) {
        looping = false
      } else {
        if (enqInProgress.compareAndSet(false, true)) { // get an exclusive right to offer
          if (ref.get() == null) {
            tailCounter.lazySet(tailCounter.get() + 1)
            ref.lazySet(a.asInstanceOf[AnyRef])
            res = true
          }

          enqInProgress.lazySet(false)
          looping = false
        }
      }
    }

    res
  }

  override def poll(default: A): A = {
    var res     = default
    var looping = true

    while (looping) {
      if (isEmpty()) {
        looping = false
      } else {
        if (deqInProgress.compareAndSet(false, true)) { // get an exclusive right to poll
          val el = ref.get().asInstanceOf[A]

          if (el != null) {
            res = el
            headCounter.lazySet(headCounter.get() + 1)
            ref.lazySet(null.asInstanceOf[AnyRef])
          }

          deqInProgress.lazySet(false)
          looping = false
        }
      }
    }

    res
  }

  override def size(): Int = if (isEmpty()) 0 else 1
} 
Example 40
Source File: OneElementConcurrentQueue.scala    From zio   with Apache License 2.0 5 votes vote down vote up
package zio.internal.impls

import java.io.Serializable

import java.util.concurrent.atomic.{ AtomicBoolean, AtomicLong, AtomicReference }

import zio.internal.MutableConcurrentQueue

final class OneElementConcurrentQueue[A] extends MutableConcurrentQueue[A] with Serializable {
  private[this] val ref = new AtomicReference[AnyRef]()

  private[this] val headCounter   = new AtomicLong(0L)
  private[this] val deqInProgress = new AtomicBoolean(false)

  private[this] val tailCounter   = new AtomicLong(0L)
  private[this] val enqInProgress = new AtomicBoolean(false)

  override final val capacity = 1

  override def dequeuedCount(): Long = headCounter.get()
  override def enqueuedCount(): Long = tailCounter.get()

  override def isEmpty(): Boolean = ref.get() == null
  override def isFull(): Boolean  = !isEmpty()

  override def offer(a: A): Boolean = {
    assert(a != null)

    var res     = false
    var looping = true

    while (looping) {
      if (isFull()) {
        looping = false
      } else {
        if (enqInProgress.compareAndSet(false, true)) { // get an exclusive right to offer
          if (ref.get() == null) {
            tailCounter.lazySet(tailCounter.get() + 1)
            ref.lazySet(a.asInstanceOf[AnyRef])
            res = true
          }

          enqInProgress.lazySet(false)
          looping = false
        }
      }
    }

    res
  }

  override def poll(default: A): A = {
    var res     = default
    var looping = true

    while (looping) {
      if (isEmpty()) {
        looping = false
      } else {
        if (deqInProgress.compareAndSet(false, true)) { // get an exclusive right to poll
          val el = ref.get().asInstanceOf[A]

          if (el != null) {
            res = el
            headCounter.lazySet(headCounter.get() + 1)
            ref.lazySet(null.asInstanceOf[AnyRef])
          }

          deqInProgress.lazySet(false)
          looping = false
        }
      }
    }

    res
  }

  override def size(): Int = if (isEmpty()) 0 else 1
} 
Example 41
Source File: PolicyHelper.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.serving.core.helpers

import java.io.{File, Serializable}

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.serving.core.constants.AppConstant
import com.stratio.sparta.serving.core.models.policy.{PolicyElementModel, PolicyModel}
import com.stratio.sparta.serving.core.utils.ReflectionUtils
import com.typesafe.config.Config

import scala.collection.JavaConversions._

object PolicyHelper extends SLF4JLogging {

  lazy val ReflectionUtils = new ReflectionUtils

  def jarsFromPolicy(apConfig: PolicyModel): Seq[File] =
    apConfig.userPluginsJars.filter(!_.jarPath.isEmpty).map(_.jarPath).distinct.map(filePath => new File(filePath))

  def getSparkConfigFromPolicy(policy: PolicyModel): Map[String, String] =
    policy.sparkConf.flatMap { sparkProperty =>
      if (sparkProperty.sparkConfKey.isEmpty || sparkProperty.sparkConfValue.isEmpty)
        None
      else Option((sparkProperty.sparkConfKey, sparkProperty.sparkConfValue))
    }.toMap

  def getSparkConfigs(elements: Seq[PolicyElementModel], methodName: String, suffix: String): Map[String, String] = {
    log.info("Initializing reflection")
    elements.flatMap(o => {
      val classType = o.configuration.getOrElse(AppConstant.CustomTypeKey, o.`type`).toString
      val clazzToInstance = ReflectionUtils.getClasspathMap.getOrElse(classType + suffix, o.`type` + suffix)
      val clazz = Class.forName(clazzToInstance)
      clazz.getMethods.find(p => p.getName == methodName) match {
        case Some(method) =>
          method.setAccessible(true)
          method.invoke(clazz, o.configuration.asInstanceOf[Map[String, Serializable]])
            .asInstanceOf[Seq[(String, String)]]
        case None =>
          Seq.empty[(String, String)]
      }
    }).toMap
  }

  def getSparkConfFromProps(clusterConfig: Config): Map[String, String] =
    clusterConfig.entrySet()
      .filter(_.getKey.startsWith("spark.")).toSeq
      .map(e => (e.getKey, e.getValue.unwrapped.toString)).toMap

} 
Example 42
Source File: StreamingTestMethod.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.stat.test

import java.io.Serializable

import scala.language.implicitConversions
import scala.math.pow

import com.twitter.chill.MeatLocker
import org.apache.commons.math3.stat.descriptive.StatisticalSummaryValues
import org.apache.commons.math3.stat.inference.TTest

import org.apache.spark.internal.Logging
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.util.StatCounter


private[stat] object StreamingTestMethod {
  // Note: after new `StreamingTestMethod`s are implemented, please update this map.
  private final val TEST_NAME_TO_OBJECT: Map[String, StreamingTestMethod] = Map(
    "welch" -> WelchTTest,
    "student" -> StudentTTest)

  def getTestMethodFromName(method: String): StreamingTestMethod =
    TEST_NAME_TO_OBJECT.get(method) match {
      case Some(test) => test
      case None =>
        throw new IllegalArgumentException(
          "Unrecognized method name. Supported streaming test methods: "
            + TEST_NAME_TO_OBJECT.keys.mkString(", "))
    }
} 
Example 43
Source File: ParserStage.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.driver.stage

import java.io.Serializable

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.driver.writer.{TransformationsWriterHelper, WriterOptions}
import com.stratio.sparta.sdk.pipeline.output.Output
import com.stratio.sparta.sdk.pipeline.transformation.Parser
import com.stratio.sparta.serving.core.constants.AppConstant
import com.stratio.sparta.serving.core.models.policy.{PhaseEnum, TransformationModel}
import com.stratio.sparta.serving.core.utils.ReflectionUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import org.apache.spark.streaming.dstream.DStream

import scala.util.{Failure, Success, Try}

trait ParserStage extends BaseStage {
  this: ErrorPersistor =>

  def parserStage(refUtils: ReflectionUtils,
                  schemas: Map[String, StructType]): (Seq[Parser], Option[WriterOptions]) =
    (policy.transformations.get.transformationsPipe.map(parser => createParser(parser, refUtils, schemas)),
      policy.transformations.get.writer.map(writer => WriterOptions(
        writer.outputs,
        writer.saveMode,
        writer.tableName,
        getAutoCalculatedFields(writer.autoCalculatedFields),
        writer.partitionBy,
        writer.primaryKey
      )))

  private[driver] def createParser(model: TransformationModel,
                           refUtils: ReflectionUtils,
                           schemas: Map[String, StructType]): Parser = {
    val classType = model.configuration.getOrElse(AppConstant.CustomTypeKey, model.`type`).toString
    val errorMessage = s"Something gone wrong creating the parser: $classType. Please re-check the policy."
    val okMessage = s"Parser: $classType created correctly."
    generalTransformation(PhaseEnum.Parser, okMessage, errorMessage) {
      val outputFieldsNames = model.outputFieldsTransformed.map(_.name)
      val schema = schemas.getOrElse(model.order.toString, throw new Exception("Can not find transformation schema"))
      refUtils.tryToInstantiate[Parser](classType + Parser.ClassSuffix, (c) =>
        c.getDeclaredConstructor(
          classOf[Integer],
          classOf[Option[String]],
          classOf[Seq[String]],
          classOf[StructType],
          classOf[Map[String, Serializable]])
          .newInstance(model.order, model.inputField, outputFieldsNames, schema, model.configuration)
          .asInstanceOf[Parser])
    }
  }
}

object ParserStage extends SLF4JLogging {

  def executeParsers(row: Row, parsers: Seq[Parser]): Seq[Row] =
    if (parsers.size == 1) parseEvent(row, parsers.head)
    else parseEvent(row, parsers.head).flatMap(eventParsed => executeParsers(eventParsed, parsers.drop(1)))

  def parseEvent(row: Row, parser: Parser): Seq[Row] =
    Try {
      parser.parse(row)
    } match {
      case Success(eventParsed) =>
        eventParsed
      case Failure(exception) =>
        val error = s"Failure[Parser]: ${row.mkString(",")} | Message: ${exception.getLocalizedMessage}" +
          s" | Parser: ${parser.getClass.getSimpleName}"
        log.error(error, exception)
        Seq.empty[Row]
    }

  def applyParsers(input: DStream[Row],
                   parsers: Seq[Parser],
                   schema: StructType,
                   outputs: Seq[Output],
                   writerOptions: Option[WriterOptions]): DStream[Row] = {
    val transformedData = if (parsers.isEmpty) input
    else input.flatMap(row => executeParsers(row, parsers))

    writerOptions.foreach(options =>
      TransformationsWriterHelper.writeTransformations(transformedData, schema, outputs, options))
    transformedData
  }
} 
Example 44
Source File: OutputStage.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.driver.stage

import java.io.Serializable

import com.stratio.sparta.sdk.pipeline.output.Output
import com.stratio.sparta.serving.core.constants.AppConstant
import com.stratio.sparta.serving.core.models.policy.{PhaseEnum, PolicyElementModel}
import com.stratio.sparta.serving.core.utils.ReflectionUtils

trait OutputStage extends BaseStage {
  this: ErrorPersistor =>

  def outputStage(refUtils: ReflectionUtils): Seq[Output] =
    policy.outputs.map(o => createOutput(o, refUtils))

  private[driver] def createOutput(model: PolicyElementModel, refUtils: ReflectionUtils): Output = {
    val errorMessage = s"Something gone wrong creating the output: ${model.name}. Please re-check the policy."
    val okMessage = s"Output: ${model.name} created correctly."
    generalTransformation(PhaseEnum.Output, okMessage, errorMessage) {
      val classType = model.configuration.getOrElse(AppConstant.CustomTypeKey, model.`type`).toString
      refUtils.tryToInstantiate[Output](classType + Output.ClassSuffix, (c) =>
        c.getDeclaredConstructor(
          classOf[String],
          classOf[Map[String, Serializable]])
          .newInstance(model.name, model.configuration)
          .asInstanceOf[Output])
    }
  }
} 
Example 45
Source File: RedisOutput.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.output.redis

import java.io.Serializable

import com.stratio.sparta.plugin.output.redis.dao.AbstractRedisDAO
import com.stratio.sparta.sdk.pipeline.output.Output._
import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum}
import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._
import org.apache.spark.sql.types.{StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row}


class RedisOutput(name: String, properties: Map[String, Serializable])
  extends Output(name, properties) with AbstractRedisDAO with Serializable {

  override val hostname = properties.getString("hostname", DefaultRedisHostname)
  override val port = properties.getString("port", DefaultRedisPort).toInt

  override def save(dataFrame: DataFrame, saveMode: SaveModeEnum.Value, options: Map[String, String]): Unit = {
    val tableName = getTableNameFromOptions(options)
    val schema = dataFrame.schema

    validateSaveMode(saveMode)

    dataFrame.foreachPartition{ rowList =>
      rowList.foreach{ row =>
        val valuesList = getValuesList(row,schema.fieldNames)
        val hashKey = getHashKeyFromRow(valuesList, schema)
        getMeasuresFromRow(valuesList, schema).foreach { case (measure, value) =>
          hset(hashKey, measure.name, value)
        }
      }
    }
  }

  def getHashKeyFromRow(valuesList: Seq[(String, String)], schema: StructType): String =
    valuesList.flatMap{ case (key, value) =>
      val fieldSearch = schema.fields.find(structField =>
        structField.metadata.contains(Output.PrimaryKeyMetadataKey) && structField.name == key)

      fieldSearch.map(structField => s"${structField.name}$IdSeparator$value")
    }.mkString(IdSeparator)

  def getMeasuresFromRow(valuesList: Seq[(String, String)], schema: StructType): Seq[(StructField, String)] =
    valuesList.flatMap{ case (key, value) =>
      val fieldSearch = schema.fields.find(structField =>
          structField.metadata.contains(Output.MeasureMetadataKey) &&
          structField.name == key)
      fieldSearch.map(field => (field, value))
    }

  def getValuesList(row: Row, fieldNames: Array[String]): Seq[(String, String)] =
    fieldNames.zip(row.toSeq).map{ case (key, value) => (key, value.toString)}.toSeq
} 
Example 46
Source File: FlumeInput.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.input.flume

import java.io.Serializable
import java.net.InetSocketAddress

import com.stratio.sparta.sdk.pipeline.input.Input
import com.stratio.sparta.sdk.properties.ValidatingPropertyMap._
import org.apache.spark.sql.Row
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.flume.FlumeUtils

class FlumeInput(properties: Map[String, Serializable]) extends Input(properties) {

  val DEFAULT_FLUME_PORT = 11999
  val DEFAULT_ENABLE_DECOMPRESSION = false
  val DEFAULT_MAXBATCHSIZE = 1000
  val DEFAULT_PARALLELISM = 5

  def initStream(ssc: StreamingContext, sparkStorageLevel: String): DStream[Row] = {

    if (properties.getString("type").equalsIgnoreCase("pull")) {
      FlumeUtils.createPollingStream(
        ssc,
        getAddresses,
        storageLevel(sparkStorageLevel),
        maxBatchSize,
        parallelism
      ).map(data => Row(data.event.getBody.array))
    } else {
      // push
      FlumeUtils.createStream(
        ssc, properties.getString("hostname"),
        properties.getString("port").toInt,
        storageLevel(sparkStorageLevel),
        enableDecompression
      ).map(data => Row(data.event.getBody.array))
    }
  }

  private def getAddresses: Seq[InetSocketAddress] =
    properties.getMapFromJsoneyString("addresses")
      .map(values => (values.get("host"), values.get("port")))
      .map {
        case (Some(address), None) =>
          new InetSocketAddress(address, DEFAULT_FLUME_PORT)
        case (Some(address), Some(port)) =>
          new InetSocketAddress(address, port.toInt)
        case _ =>
          throw new IllegalStateException(s"Invalid configuration value for addresses : ${properties.get("addresses")}")
      }

  private def enableDecompression: Boolean =
    properties.hasKey("enableDecompression") match {
      case true => properties.getBoolean("enableDecompression")
      case false => DEFAULT_ENABLE_DECOMPRESSION
    }

  private def parallelism: Int = {
    properties.hasKey("parallelism") match {
      case true => properties.getString("parallelism").toInt
      case false => DEFAULT_PARALLELISM
    }
  }

  private def maxBatchSize: Int =
    properties.hasKey("maxBatchSize") match {
      case true => properties.getString("maxBatchSize").toInt
      case false => DEFAULT_MAXBATCHSIZE
    }
} 
Example 47
Source File: HostPortZkTest.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.input.kafka


import java.io.Serializable

import com.stratio.sparta.sdk.properties.JsoneyString
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpec}

@RunWith(classOf[JUnitRunner])
class HostPortZkTest extends WordSpec with Matchers {

  class KafkaTestInput(val properties: Map[String, Serializable]) extends KafkaBase
  
  "getHostPortZk" should {

    "return a chain (zookeper:conection , host:port)" in {
      val conn = """[{"host": "localhost", "port": "2181"}]"""
      val props = Map("zookeeper.connect" -> JsoneyString(conn), "zookeeper.path" -> "")
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181"))
    }

    "return a chain (zookeper:conection , host:port, zookeeper.path:path)" in {
      val conn = """[{"host": "localhost", "port": "2181"}]"""
      val props = Map("zookeeper.connect" -> JsoneyString(conn), "zookeeper.path" -> "/test")
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181/test"))
    }

    "return a chain (zookeper:conection , host:port,host:port,host:port)" in {
      val conn =
        """[{"host": "localhost", "port": "2181"},{"host": "localhost", "port": "2181"},
          |{"host": "localhost", "port": "2181"}]""".stripMargin
      val props = Map("zookeeper.connect" -> JsoneyString(conn))
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181,localhost:2181,localhost:2181"))
    }

    "return a chain (zookeper:conection , host:port,host:port,host:port, zookeeper.path:path)" in {
      val conn =
        """[{"host": "localhost", "port": "2181"},{"host": "localhost", "port": "2181"},
          |{"host": "localhost", "port": "2181"}]""".stripMargin
      val props = Map("zookeeper.connect" -> JsoneyString(conn), "zookeeper.path" -> "/test")
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181,localhost:2181,localhost:2181/test"))
    }

    "return a chain with default port (zookeper:conection , host: defaultport)" in {

      val props = Map("foo" -> "var")
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181"))
    }

    "return a chain with default port (zookeper:conection , host: defaultport, zookeeper.path:path)" in {
      val props = Map("zookeeper.path" -> "/test")
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181/test"))
    }

    "return a chain with default host and default porty (zookeeper.connect: ," +
      "defaultHost: defaultport," +
      "zookeeper.path:path)" in {
      val props = Map("foo" -> "var")
      val input = new KafkaTestInput(props)

      input.getHostPortZk("zookeeper.connect", "localhost", "2181") should
        be(Map("zookeeper.connect" -> "localhost:2181"))
    }
  }
} 
Example 48
Source File: MorphlinesParserTest.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.transformation.morphline

import java.io.Serializable

import com.stratio.sparta.sdk.pipeline.input.Input
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, Matchers, WordSpecLike}


@RunWith(classOf[JUnitRunner])
class MorphlinesParserTest extends WordSpecLike with Matchers with BeforeAndAfter with BeforeAndAfterAll {

  val morphlineConfig = """
          id : test1
          importCommands : ["org.kitesdk.**"]
          commands: [
          {
              readJson {},
          }
          {
              extractJsonPaths {
                  paths : {
                      col1 : /col1
                      col2 : /col2
                  }
              }
          }
          {
            java {
              code : "return child.process(record);"
            }
          }
          {
              removeFields {
                  blacklist:["literal:_attachment_body"]
              }
          }
          ]
                        """
  val inputField = Some(Input.RawDataKey)
  val outputsFields = Seq("col1", "col2")
  val props: Map[String, Serializable] = Map("morphline" -> morphlineConfig)

  val schema = StructType(Seq(StructField("col1", StringType), StructField("col2", StringType)))

  val parser = new MorphlinesParser(1, inputField, outputsFields, schema, props)

  "A MorphlinesParser" should {

    "parse a simple json" in {
      val simpleJson =
        """{
            "col1":"hello",
            "col2":"word"
            }
        """
      val input = Row(simpleJson)
      val result = parser.parse(input)

      val expected = Seq(Row(simpleJson, "hello", "world"))

      result should be eq(expected)
    }

    "parse a simple json removing raw" in {
      val simpleJson =
        """{
            "col1":"hello",
            "col2":"word"
            }
        """
      val input = Row(simpleJson)
      val result = parser.parse(input)

      val expected = Seq(Row("hello", "world"))

      result should be eq(expected)
    }

    "exclude not configured fields" in {
      val simpleJson =
        """{
            "col1":"hello",
            "col2":"word",
            "col3":"!"
            }
        """
      val input = Row(simpleJson)
      val result = parser.parse(input)

      val expected = Seq(Row(simpleJson, "hello", "world"))

      result should be eq(expected)
    }
  }
} 
Example 49
Source File: BoundedPriorityQueue.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 50
Source File: BoundedPriorityQueue.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 51
Source File: BoundedPriorityQueue.scala    From sparker   with GNU General Public License v3.0 5 votes vote down vote up
package SparkER.Utilities

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 52
Source File: BoundedPriorityQueue.scala    From sparker   with GNU General Public License v3.0 5 votes vote down vote up
package Utilities

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 53
Source File: BoundedPriorityQueue.scala    From sparker   with GNU General Public License v3.0 5 votes vote down vote up
package Utilities

import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}

import scala.collection.JavaConverters._
import scala.collection.generic.Growable


class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
  extends Iterable[A] with Growable[A] with Serializable {

  private val underlying = new JPriorityQueue[A](maxSize, ord)

  override def iterator: Iterator[A] = underlying.iterator.asScala

  override def size: Int = underlying.size

  override def ++=(xs: TraversableOnce[A]): this.type = {
    xs.foreach { this += _ }
    this
  }

  override def +=(elem: A): this.type = {
    if (size < maxSize) {
      underlying.offer(elem)
    } else {
      maybeReplaceLowest(elem)
    }
    this
  }

  override def +=(elem1: A, elem2: A, elems: A*): this.type = {
    this += elem1 += elem2 ++= elems
  }

  override def clear() { underlying.clear() }

  private def maybeReplaceLowest(a: A): Boolean = {
    val head = underlying.peek()
    if (head != null && ord.gt(a, head)) {
      underlying.poll()
      underlying.offer(a)
    } else {
      false
    }
  }
} 
Example 54
Source File: CursorOp.scala    From nexus   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.rdf

import java.io.Serializable

import ch.epfl.bluebrain.nexus.rdf.Node.IriNode


sealed abstract class CursorOp extends Product with Serializable

object CursorOp {
  final case object Top                        extends CursorOp
  final case object Parent                     extends CursorOp
  final case object Narrow                     extends CursorOp
  final case class Up(predicate: IriNode)      extends CursorOp
  final case class UpSet(predicate: IriNode)   extends CursorOp
  final case class DownSet(predicate: IriNode) extends CursorOp
  final case class Down(predicate: IriNode)    extends CursorOp
}