java.io.ByteArrayOutputStream Scala Examples

The following examples show how to use java.io.ByteArrayOutputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: byte_message.scala    From libisabelle   with Apache License 2.0 5 votes vote down vote up
package isabelle

import java.io.{ByteArrayOutputStream, OutputStream, InputStream, IOException}


object Byte_Message
{
  

  private def is_length(msg: Bytes): Boolean =
    !msg.is_empty && msg.iterator.forall(b => Symbol.is_ascii_digit(b.toChar))

  private def is_terminated(msg: Bytes): Boolean =
  {
    val len = msg.length
    len > 0 && Symbol.is_ascii_line_terminator(msg.charAt(len - 1))
  }

  def write_line_message(stream: OutputStream, msg: Bytes)
  {
    if (is_length(msg) || is_terminated(msg))
      error ("Bad content for line message:\n" ++ msg.text.take(100))

    val n = msg.length
    write(stream,
      (if (n > 100 || msg.iterator.contains(10)) make_header(List(n + 1)) else Nil) :::
        List(msg, Bytes.newline))
    flush(stream)
  }

  def read_line_message(stream: InputStream): Option[Bytes] =
    read_line(stream) match {
      case None => None
      case Some(line) =>
        Value.Nat.unapply(line.text) match {
          case None => Some(line)
          case Some(n) => read_block(stream, n)._1.map(_.trim_line)
        }
    }
} 
Example 2
Source File: AvroConverter.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.converters.sink

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import io.confluent.connect.avro.AvroData
import java.io.ByteArrayOutputStream
import java.io.File
import org.apache.avro.{Schema => AvroSchema}
import org.apache.avro.generic.GenericRecord
import org.apache.avro.io.EncoderFactory
import org.apache.avro.reflect.ReflectDatumWriter
import org.apache.kafka.connect.sink.SinkRecord
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException


class AvroConverter extends Converter {
  private val avroData = new AvroData(8)
  private var sinkToSchemaMap: Map[String, AvroSchema] = Map.empty
  private var avroWritersMap: Map[String, ReflectDatumWriter[Object]] = Map.empty

  override def convert(sinkTopic: String,
                       data: SinkRecord): SinkRecord = {
    Option(data) match {
      case None =>
        new SinkRecord(
          sinkTopic,
          0,
          null,
          null,
          avroData.toConnectSchema(sinkToSchemaMap(sinkTopic)),
          null,
          0
        )
      case Some(_) =>
        val kafkaTopic = data.topic()
        val writer = avroWritersMap.getOrElse(kafkaTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $kafkaTopic"))

        val output = new ByteArrayOutputStream();
        val decoder = EncoderFactory.get().binaryEncoder(output, null)
        output.reset()

        val avro = avroData.fromConnectData(data.valueSchema(), data.value())
        avro.asInstanceOf[GenericRecord]

        val record = writer.write(avro, decoder)
        decoder.flush()
        val arr = output.toByteArray

        new SinkRecord(
          kafkaTopic,
          data.kafkaPartition(),
          MsgKey.schema,
          MsgKey.getStruct(sinkTopic, data.key().toString()),
          data.valueSchema(),
          arr,
          0
        )


    }
  }

  override def initialize(config: Map[String, String]): Unit = {
    sinkToSchemaMap = AvroConverter.getSchemas(config)
    avroWritersMap = sinkToSchemaMap.map { case (key, schema) =>
      key -> new ReflectDatumWriter[Object](schema)
    }
  }
}

object AvroConverter {
  val SCHEMA_CONFIG = "connect.converter.avro.schemas"

  def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = {
    config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided"))
      .toString
      .split(';')
      .filter(_.trim.nonEmpty)
      .map(_.split("="))
      .map {
        case Array(sink, path) =>
          val file = new File(path)
          if (!file.exists()) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!")
          }
          val s = sink.trim.toLowerCase()
          if (s.isEmpty) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path")
          }
          s -> new AvroSchema.Parser().parse(file)
        case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Sink->AVRO_FILE")
      }.toMap
  }
} 
Example 3
Source File: AvroSerializer.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.serialization

import java.io.{ByteArrayOutputStream, InputStream, OutputStream}

import com.sksamuel.avro4s.{RecordFormat, SchemaFor}
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}

object AvroSerializer {
  def write[T <: Product](t: T)(implicit os: OutputStream, formatter: RecordFormat[T], schemaFor: SchemaFor[T]): Unit = write(apply(t), schemaFor())

  def write(record: GenericRecord, schema: Schema)(implicit os: OutputStream) = {
    val writer = new GenericDatumWriter[GenericRecord](schema)
    val encoder = EncoderFactory.get().binaryEncoder(os, null)

    writer.write(record, encoder)
    encoder.flush()
    os.flush()
  }

  def getBytes[T <: Product](t: T)(implicit recordFormat: RecordFormat[T], schemaFor: SchemaFor[T]): Array[Byte] = getBytes(recordFormat.to(t), schemaFor())

  def getBytes(record: GenericRecord, schema: Schema): Array[Byte] = {
    implicit val output = new ByteArrayOutputStream()
    write(record, schema)
    output.toByteArray
  }

  def read(is: InputStream, schema: Schema): GenericRecord = {
    val reader = new GenericDatumReader[GenericRecord](schema)
    val decoder = DecoderFactory.get().binaryDecoder(is, null)
    reader.read(null, decoder)
  }

  def read[T <: Product](is: InputStream)(implicit schemaFor: SchemaFor[T], recordFormat: RecordFormat[T]): T = recordFormat.from(read(is, schemaFor()))

  def apply[T <: Product](t: T)(implicit formatter: RecordFormat[T]): GenericRecord = formatter.to(t)
} 
Example 4
Source File: FeaturePolygonTest.scala    From spark-pip   with Apache License 2.0 5 votes vote down vote up
package com.esri

import java.io.ByteArrayOutputStream

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import org.geotools.geometry.jts.WKTReader2
import org.scalatest._

import scala.io.Source


class FeaturePolygonTest extends FlatSpec with Matchers {

  it should "read zero area geometry" in {
    val kryo = new Kryo()
    kryo.register(classOf[FeaturePolygon])

    val reader = new WKTReader2()
    Source
      .fromFile("/tmp/world.tsv")
      .getLines()
      .foreach(line => {
        val tokens = line.split("\t")
        val geom = reader.read(tokens(14))
        FeaturePolygon(geom, Array.empty[String])
          .toRowCols(4.0)
          .foreach {
            case (rowcol, feature) => {
              feature.geom.getGeometryType should endWith("Polygon")

              val baos = new ByteArrayOutputStream(4096)
              val output = new Output(baos)
              kryo.writeObject(output, feature)
              output.flush()

              val obj = kryo.readObject[FeaturePolygon](new Input(baos.toByteArray), classOf[FeaturePolygon])
              obj.geom.equalsExact(feature.geom, 0.000001)
            }
          }
      })
  }
} 
Example 5
Source File: S3Brain.scala    From sumobot   with Apache License 2.0 5 votes vote down vote up
package com.sumologic.sumobot.brain

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Properties

import akka.actor.{Actor, Props}
import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider}
import com.amazonaws.services.s3.{AmazonS3Client, AmazonS3ClientBuilder}
import com.amazonaws.services.s3.model.ObjectMetadata
import com.sumologic.sumobot.brain.Brain._

import scala.collection.JavaConverters._
import scala.collection.immutable

object S3Brain {
  def props(credentials: AWSCredentials,
            bucket: String,
            s3Key: String): Props = Props(classOf[S3Brain], credentials, bucket, s3Key)
}

class S3Brain(credentials: AWSCredentials,
              bucket: String,
              s3Key: String) extends Actor {

  private val s3Client = AmazonS3ClientBuilder.standard()
    .withCredentials(new AWSStaticCredentialsProvider(credentials)).build

  private var brainContents: Map[String, String] = loadFromS3()

  override def receive: Receive = {
    case Store(key, value) =>
      brainContents += (key -> value)
      saveToS3(brainContents)

    case Remove(key) =>
      brainContents -= key
      saveToS3(brainContents)

    case Retrieve(key) =>
      brainContents.get(key) match {
        case Some(value) => sender() ! ValueRetrieved(key, value)
        case None => sender() ! ValueMissing(key)
      }

    case ListValues(prefix) =>
      sender() ! ValueMap(brainContents.filter(_._1.startsWith(prefix)))
  }

  private def loadFromS3(): Map[String, String] = {
    if (s3Client.doesBucketExistV2(bucket)) {
      val props = new Properties()
      props.load(s3Client.getObject(bucket, s3Key).getObjectContent)
      immutable.Map(props.asScala.toSeq: _*)
    } else {
      Map.empty
    }
  }

  private def saveToS3(contents: Map[String, String]): Unit = {
    if (!s3Client.doesBucketExistV2(bucket)) {
      s3Client.createBucket(bucket)
    }

    val props = new Properties()
    props.putAll(contents.asJava)
    val out = new ByteArrayOutputStream()
    props.store(out, "")
    out.flush()
    out.close()
    val in = new ByteArrayInputStream(out.toByteArray)
    s3Client.putObject(bucket, s3Key, in, new ObjectMetadata())
  }
} 
Example 6
Source File: Serialization.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization 
Example 7
Source File: IO.scala    From RosHTTP   with MIT License 5 votes vote down vote up
package fr.hmil.roshttp.tools.io

import java.io.{ByteArrayOutputStream, OutputStream, Writer, _}

import scala.annotation.tailrec
import scala.reflect.ClassTag


  def pipe(in: Reader, out: Writer): Unit = {
    val buffer = newBuffer[Char]

    @tailrec
    def loop(): Unit = {
      val size = in.read(buffer)
      if (size > 0) {
        out.write(buffer, 0, size)
        loop()
      }
    }
    loop()
  }

  @inline
  private def newBuffer[T: ClassTag] = new Array[T](4096)
} 
Example 8
Source File: XmlEncoder.scala    From phobos   with Apache License 2.0 5 votes vote down vote up
package ru.tinkoff.phobos.encoding

import java.io.ByteArrayOutputStream

import cats.syntax.option._
import com.fasterxml.aalto.stax.OutputFactoryImpl
import org.codehaus.stax2.XMLStreamWriter2
import ru.tinkoff.phobos.Namespace
import ru.tinkoff.phobos.encoding.XmlEncoder.XmlEncoderConfig


trait XmlEncoder[A] {
  val localname: String
  val namespaceuri: Option[String]
  val elementencoder: ElementEncoder[A]

  def encode(a: A, charset: String = "UTF-8"): String =
    new String(encodeToBytes(a, charset), charset)

  def encodeToBytes(a: A, charset: String = "UTF-8"): Array[Byte] = {
    val os      = new ByteArrayOutputStream
    val factory = new OutputFactoryImpl
    factory.setProperty("javax.xml.stream.isRepairingNamespaces", true)
    val sw = new PhobosStreamWriter(factory.createXMLStreamWriter(os, charset).asInstanceOf[XMLStreamWriter2])
    sw.writeStartDocument()
    elementencoder.encodeAsElement(a, sw, localname, namespaceuri)
    sw.writeEndDocument()
    sw.flush()
    sw.close()
    os.toByteArray
  }

  def encodeWithConfig(a: A, config: XmlEncoderConfig): String =
    new String(encodeToBytesWithConfig(a, config), config.encoding)

  def encodeToBytesWithConfig(a: A, config: XmlEncoderConfig): Array[Byte] = {
    val os      = new ByteArrayOutputStream
    val factory = new OutputFactoryImpl
    factory.setProperty("javax.xml.stream.isRepairingNamespaces", true)
    val sw = new PhobosStreamWriter(factory.createXMLStreamWriter(os, config.encoding).asInstanceOf[XMLStreamWriter2])
    if (config.writeProlog) {
      sw.writeStartDocument(config.encoding, config.version)
    }
    elementencoder.encodeAsElement(a, sw, localname, namespaceuri)
    if (config.writeProlog) {
      sw.writeEndDocument()
    }
    sw.flush()
    sw.close()
    os.toByteArray
  }

}

object XmlEncoder {

  def apply[A](implicit instance: XmlEncoder[A]): XmlEncoder[A] = instance

  def fromElementEncoder[A](localName: String, namespaceUri: Option[String])(
      implicit elementEncoder: ElementEncoder[A]): XmlEncoder[A] =
    new XmlEncoder[A] {
      val localname: String                 = localName
      val namespaceuri: Option[String]      = namespaceUri
      val elementencoder: ElementEncoder[A] = elementEncoder
    }

  def fromElementEncoder[A](localName: String)(implicit elementEncoder: ElementEncoder[A]): XmlEncoder[A] =
    fromElementEncoder(localName, None)

  def fromElementEncoderNs[A, NS](localName: String, namespaceInstance: NS)(implicit elementEncoder: ElementEncoder[A],
                                                                            namespace: Namespace[NS]): XmlEncoder[A] =
    fromElementEncoder(localName, namespace.getNamespace.some)

  def fromElementEncoderNs[A, NS](localName: String)(implicit elementEncoder: ElementEncoder[A],
                                                     namespace: Namespace[NS]): XmlEncoder[A] =
    fromElementEncoder(localName, namespace.getNamespace.some)

  final case class XmlEncoderConfig(
      encoding: String,
      version: String,
      writeProlog: Boolean
  ) {
    def withoutProlog: XmlEncoderConfig = copy(writeProlog = false)
  }

  val defaultConfig: XmlEncoderConfig =
    XmlEncoderConfig(
      encoding = "UTF-8",
      version = "1.0",
      writeProlog = true
    )
} 
Example 9
Source File: AllCodecTest.scala    From aws-lambda-scala   with MIT License 5 votes vote down vote up
package io.github.mkotsur.aws.codecs

import java.io.ByteArrayOutputStream

import com.amazonaws.services.lambda.runtime.Context
import io.circe.generic.auto._
import io.github.mkotsur.StringInputStream
import org.scalatest.EitherValues._
import org.scalatest.concurrent.Eventually
import org.mockito.MockitoSugar
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should
import org.scalatest.{FunSuite, Matchers}

class AllCodecTest extends AnyFunSuite with should.Matchers with MockitoSugar with Eventually {

  test("should decode null") {
    new AllCodec {
      val is = new StringInputStream("""null""")

      val value = canDecodeAll[None.type].readStream(is)
      value.right.value shouldBe Option.empty[None.type]
    }
  }

  test("should decode empty string") {
    new AllCodec {
      val is = new StringInputStream("")

      val value = canDecodeAll[None.type].readStream(is)
      value.right.value shouldBe Option.empty[None.type]
    }
  }

  test("should encode null") {
    new AllCodec {
      val os = new ByteArrayOutputStream()

      val context: Context = mock[Context]

      canEncodeAll[None.type].writeStream(os, Right(None), context)
      os.toString shouldBe "null"
    }
  }

} 
Example 10
Source File: FutureCodec.scala    From aws-lambda-scala   with MIT License 5 votes vote down vote up
package io.github.mkotsur.aws.codecs

import java.io.ByteArrayOutputStream
import java.nio.charset.Charset

import io.circe.Encoder
import io.github.mkotsur.aws.handler.CanEncode
import io.github.mkotsur.aws.proxy.ProxyResponse
import io.circe.generic.auto._
import io.circe.syntax._
import cats.syntax.either.catsSyntaxEither

import scala.concurrent.{Await, Future}
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.{Failure, Success, Try}

private[aws] trait FutureCodec {
  implicit def canEncodeFuture[I: Encoder](implicit canEncode: Encoder[I]) =
    CanEncode.instance[Future[I]]((os, responseEither, ctx) => {
      (for {
        response     <- responseEither.toTry
        futureResult <- Try(Await.result(response, ctx.getRemainingTimeInMillis millis))
        json         <- Try(canEncode(futureResult).noSpaces.getBytes)
        _            <- Try(os.write(json))
      } yield {
        ()
      }) match {
        case Success(v) => Right(v)
        case Failure(e) => Left(e)
      }
    })

  implicit def canEncodeProxyResponse[T](implicit canEncode: CanEncode[T]) = CanEncode.instance[ProxyResponse[T]](
    (output, proxyResponseEither, ctx) => {

      def writeBody(bodyOption: Option[T]): Either[Throwable, Option[String]] =
        bodyOption match {
          case None => Right(None)
          case Some(body) =>
            val os     = new ByteArrayOutputStream()
            val result = canEncode.writeStream(os, Right(body), ctx)
            os.close()
            result.map(_ => Some(os.toString()))
        }

      val proxyResposeOrError = for {
        proxyResponse <- proxyResponseEither
        bodyOption    <- writeBody(proxyResponse.body)
      } yield
        ProxyResponse[String](
          proxyResponse.statusCode,
          proxyResponse.headers,
          bodyOption
        )

      val response = proxyResposeOrError match {
        case Right(proxyRespose) =>
          proxyRespose
        case Left(e) =>
          ProxyResponse[String](
            500,
            Some(Map("Content-Type" -> s"text/plain; charset=${Charset.defaultCharset().name()}")),
            Some(e.getMessage)
          )
      }

      output.write(response.asJson.noSpaces.getBytes)

      Right(())
    }
  )
} 
Example 11
Source File: DataWeaveCLITest.scala    From data-weave-native   with Apache License 2.0 5 votes vote down vote up
package org.mule.weave.dwnative.cli

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.PrintStream

import org.scalatest.FreeSpec
import org.scalatest.Matchers

import scala.io.Source

class DataWeaveCLITest extends FreeSpec with Matchers {

  "should work with output application/json" in {
    val out = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("output application/json --- (1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      println("Finish OK 3")
    }
  }

  "should work with simple script and not output" in {
    val defaultOut = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("(1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(defaultOut)
    }
  }

  "should work ok when sending payload from stdin" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[
          |  1,
          |  2,
          |  3
          |]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("payload[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }

  "should work with light formats" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[{
          |  "a" : 1,
          |  "b" : 2,
          |  "c" : 3
          |}]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("input payload json output csv header=false ---payload"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1,2,3"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }



} 
Example 12
Source File: ModelSerializationTestHelper.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha

import java.io.{ObjectInputStream, ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}


trait ModelSerializationTestHelper {
  def serializeDeserializeRoundTrip[A <: java.io.Serializable](a: A): A = {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(a)
    val bais = new ByteArrayInputStream(baos.toByteArray)
    val ois = new ObjectInputStream(bais)
    val out = ois.readObject()
    out.asInstanceOf[A]
  }
} 
Example 13
Source File: ConsoleModuleTest.scala    From scala-server-toolkit   with MIT License 5 votes vote down vote up
package com.avast.sst.jvm.system.console

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import cats.effect.SyncIO
import org.scalatest.funsuite.AnyFunSuite

import scala.{Console => SConsole}

class ConsoleModuleTest extends AnyFunSuite {

  test("Console input") {
    SConsole.withIn(new ByteArrayInputStream("test input\n".getBytes("UTF-8"))) {
      val test = for {
        line <- ConsoleModule.make[SyncIO].readLine
      } yield assert(line === "test input")

      test.unsafeRunSync()
    }
  }

  test("Console output") {
    val out = new ByteArrayOutputStream()
    SConsole.withOut(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLine("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

  test("Console error") {
    val out = new ByteArrayOutputStream()
    SConsole.withErr(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLineToError("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

} 
Example 14
Source File: SparkTestsSuite.scala    From amaterasu   with Apache License 2.0 5 votes vote down vote up
package org.apache.amaterasu.spark

import java.io.{ByteArrayOutputStream, File}

import io.shinto.amaterasu.spark.PySparkRunnerTests
import org.apache.amaterasu.RunnersTests.RunnersLoadingTests
import org.apache.amaterasu.common.dataobjects.ExecData
import org.apache.amaterasu.common.execution.dependencies._
import org.apache.amaterasu.common.runtime.Environment
import org.apache.amaterasu.utilities.TestNotifier
import org.apache.amaterasu.executor.mesos.executors.ProvidersFactory
import org.apache.spark.repl.amaterasu.runners.spark.SparkScalaRunner
import org.apache.spark.sql.SparkSession
import org.scalatest._

import scala.collection.mutable.ListBuffer


class SparkTestsSuite extends Suites(
  new PySparkRunnerTests(),
  new RunnersLoadingTests()) with BeforeAndAfterAll {

  var env: Environment = _
  var factory: ProvidersFactory = _
  var spark: SparkSession = _

  override def beforeAll(): Unit = {

    env = Environment()
    env.workingDir = "file:///tmp/"
    env.master = "local[*]"

    // I can't apologise enough for this
    val resources = new File(getClass.getResource("/spark_intp.py").getPath).getParent

    val conf = Map[String, Any](
      "spark.cassandra.connection.host" -> "127.0.0.1",
      "sourceTable" -> "documents",
      "spark.local.ip" -> "127.0.0.1"
    )
    env.master = "local[1]"
    if (env.configuration != null) env.configuration ++ "pysparkPath" -> "/usr/bin/python" else env.configuration = Map(
      "pysparkPath" -> "/usr/bin/python",
      "cwd" -> resources
    )
    val excEnv = Map[String, Any](
      "PYTHONPATH" -> resources
    )
    env.configuration ++ "spark_exec_env" -> excEnv
    factory = ProvidersFactory(ExecData(env, Dependencies(ListBuffer.empty[Repo], List.empty[Artifact]), PythonDependencies(List.empty[PythonPackage]), Map("spark" -> Map.empty[String, Any],"spark_exec_env"->Map("PYTHONPATH"->resources))), "test", new ByteArrayOutputStream(), new TestNotifier(), "test")
    spark = factory.getRunner("spark", "scala").get.asInstanceOf[SparkScalaRunner].spark

    this.nestedSuites.filter(s => s.isInstanceOf[RunnersLoadingTests]).foreach(s => s.asInstanceOf[RunnersLoadingTests].factory = factory)
    this.nestedSuites.filter(s => s.isInstanceOf[PySparkRunnerTests]).foreach(s => s.asInstanceOf[PySparkRunnerTests].factory = factory)


    super.beforeAll()
  }

  override def afterAll(): Unit = {
    spark.stop()

    super.afterAll()
  }

} 
Example 15
Source File: SparkRRunner.scala    From amaterasu   with Apache License 2.0 5 votes vote down vote up
package org.apache.amaterasu.executor.execution.actions.runners.spark

import java.io.ByteArrayOutputStream
import java.util

import org.apache.amaterasu.common.execution.actions.Notifier
import org.apache.amaterasu.common.logging.Logging
import org.apache.amaterasu.common.runtime.Environment
import org.apache.amaterasu.sdk.AmaterasuRunner
import org.apache.spark.SparkContext


class SparkRRunner extends Logging with AmaterasuRunner {

  override def getIdentifier = "spark-r"

  override def executeSource(actionSource: String, actionName: String, exports: util.Map[String, String]): Unit = {
  }
}

object SparkRRunner {
  def apply(
    env: Environment,
    jobId: String,
    sparkContext: SparkContext,
    outStream: ByteArrayOutputStream,
    notifier: Notifier,
    jars: Seq[String]
  ): SparkRRunner = {
    new SparkRRunner()
  }
} 
Example 16
Source File: ProvidersFactory.scala    From amaterasu   with Apache License 2.0 5 votes vote down vote up
package org.apache.amaterasu.executor.mesos.executors

import java.io.ByteArrayOutputStream

import org.apache.amaterasu.common.dataobjects.ExecData
import org.apache.amaterasu.common.execution.actions.Notifier
import org.apache.amaterasu.sdk.{AmaterasuRunner, RunnersProvider}
import org.reflections.Reflections

import scala.collection.JavaConversions._

//TODO: Check if we can use this in the YARN impl
class ProvidersFactory {

  var providers: Map[String, RunnersProvider] = _

  def getRunner(groupId: String, id: String): Option[AmaterasuRunner] = {
    val provider = providers.get(groupId)
    provider match {
      case Some(provider) => Some(provider.getRunner(id))
      case None => None
    }
  }
}

object ProvidersFactory {

  def apply(data: ExecData,
            jobId: String,
            outStream: ByteArrayOutputStream,
            notifier: Notifier,
            executorId: String): ProvidersFactory = {

    val result = new ProvidersFactory()
    val reflections = new Reflections(getClass.getClassLoader)
    val runnerTypes = reflections.getSubTypesOf(classOf[RunnersProvider]).toSet

    result.providers = runnerTypes.map(r => {

      val provider = Manifest.classType(r).runtimeClass.newInstance.asInstanceOf[RunnersProvider]

      notifier.info(s"a provider for group ${provider.getGroupIdentifier} was created")
      provider.init(data, jobId, outStream, notifier, executorId)
      (provider.getGroupIdentifier, provider)
    }).toMap

    result
  }

} 
Example 17
Source File: AvroSerde.scala    From event-sourcing-kafka-streams   with MIT License 5 votes vote down vote up
package org.amitayh.invoices.common.serde

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.time.Instant
import java.util
import java.util.UUID

import com.sksamuel.avro4s._
import org.amitayh.invoices.common.domain._
import org.amitayh.invoices.common.serde.UuidConverters.{fromByteBuffer, toByteBuffer}
import org.apache.avro.Schema
import org.apache.avro.Schema.Field
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}

object AvroSerde {
  implicit val instantToSchema: ToSchema[Instant] = new ToSchema[Instant] {
    override val schema: Schema = Schema.create(Schema.Type.STRING)
  }

  implicit val instantToValue: ToValue[Instant] = new ToValue[Instant] {
    override def apply(value: Instant): String = value.toString
  }

  implicit val instantFromValue: FromValue[Instant] = new FromValue[Instant] {
    override def apply(value: Any, field: Field): Instant =
      Instant.parse(value.toString)
  }

  implicit val uuidToSchema: ToSchema[UUID] = new ToSchema[UUID] {
    override val schema: Schema = Schema.create(Schema.Type.BYTES)
  }

  implicit val uuidToValue: ToValue[UUID] = new ToValue[UUID] {
    override def apply(value: UUID): ByteBuffer = toByteBuffer(value)
  }

  implicit val uuidFromValue: FromValue[UUID] = new FromValue[UUID] {
    override def apply(value: Any, field: Field): UUID =
      fromByteBuffer(value.asInstanceOf[ByteBuffer])
  }

  val CommandSerde: Serde[Command] = serdeFor[Command]

  val CommandResultSerde: Serde[CommandResult] = serdeFor[CommandResult]

  val SnapshotSerde: Serde[InvoiceSnapshot] = serdeFor[InvoiceSnapshot]

  val EventSerde: Serde[Event] = serdeFor[Event]

  def toBytes[T: SchemaFor: ToRecord](data: T): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val output = AvroOutputStream.binary[T](baos)
    output.write(data)
    output.close()
    baos.toByteArray
  }

  def fromBytes[T: SchemaFor: FromRecord](data: Array[Byte]): T = {
    val input = AvroInputStream.binary[T](data)
    input.iterator.next()
  }

  private def serdeFor[T: SchemaFor: ToRecord: FromRecord]: Serde[T] = new Serde[T] {
    override val serializer: Serializer[T] = new Serializer[T] {
      override def serialize(topic: String, data: T): Array[Byte] = toBytes(data)
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
      override def close(): Unit = ()
    }
    override val deserializer: Deserializer[T] = new Deserializer[T] {
      override def deserialize(topic: String, data: Array[Byte]): T = fromBytes(data)
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
      override def close(): Unit = ()
    }
    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
    override def close(): Unit = ()
  }
} 
Example 18
Source File: PackageSpec.scala    From sparkpipe-core   with Apache License 2.0 5 votes vote down vote up
package software.uncharted.sparkpipe.ops.core.rdd.debug

import java.io.ByteArrayOutputStream

import org.scalatest._
import software.uncharted.sparkpipe.Spark

class PackageSpec extends FunSpec {
  describe("ops.core.rdd.debug") {
    val rdd = Spark.sc.parallelize(Seq((1, "alpha"), (2, "bravo"), (3, "charlie")))

    describe("#countRDDRows()") {
      it("should output a formatted count message using the supplied output function") {
        var output = ""
        countRDDRows("test", (s: String) => output += s)(rdd)
        assertResult("[test] Number of rows: 3")(output)
      }

      it("should output a formatted count message to std out when no output function is supplied") {
        val bos = new ByteArrayOutputStream()
        Console.withOut(bos) {
          countRDDRows("test")(rdd)
        }
        assertResult("[test] Number of rows: 3\n")(bos.toString)
      }
    }

    describe("#takeRDDRows()") {
      it("should output a list of the first N rows of the rdd") {
        var output = ""
        takeRDDRows(2, "test", (s: String) => output += s)(rdd)
        assertResult("[test] First 2 rows0: (1,alpha)1: (2,bravo)")(output)
      }
    }

    describe("#debugRDDRows()") {
      it("should apply a function to the first N rows of the rdd") {
        var output = Seq[(Int, String)]()
        debugRDDRows(2, (s: Seq[(Int, String)]) => output = s)(rdd)
        assertResult(2)(output.length)
        assertResult(output)(rdd.collect().slice(0, 2).toSeq)
      }
    }
  }
} 
Example 19
Source File: PackageSpec.scala    From sparkpipe-core   with Apache License 2.0 5 votes vote down vote up
package software.uncharted.sparkpipe.ops.core.dataframe.debug

import java.io.ByteArrayOutputStream

import org.apache.spark.sql.Row
import org.scalatest._
import software.uncharted.sparkpipe.Spark
import software.uncharted.sparkpipe.ops.core.rdd.toDF

class PackageSpec extends FunSpec {
  describe("ops.core.dataframe.debug") {
    val rdd = Spark.sc.parallelize(Seq((1, "alpha"), (2, "bravo"), (3, "charlie")))
    val df = toDF(Spark.sparkSession)(rdd)

   describe("#countDFRows()") {
      it("should output a formatted count message using the supplied output function") {
        var output = ""
        countDFRows("test", (s: String) => output += s)(df)
        assertResult("[test] Number of rows: 3")(output)
      }

      it("should output a formatted count message to std out when no output function is supplied") {
        val bos = new ByteArrayOutputStream()
        Console.withOut(bos) {
          countDFRows("test")(df)
        }
        assertResult("[test] Number of rows: 3\n")(bos.toString)
      }
    }

    describe("#takeDFRows()") {
      it("should output a list of the first N rows of the dataframe") {
        var output = ""
        takeDFRows(2, "test", (s: String) => output += s)(df)
        assertResult("[test] First 2 rows0: [1,alpha]1: [2,bravo]")(output)
      }
    }

    describe("#debugDFRows()") {
      it("should apply a function to the first N rows of the dataframe") {
        var output = Seq[Row]()
        debugDFRows(2, (s: Seq[Row]) => output = s)(df)
        assertResult(2)(output.length)
        assertResult(output)(df.collect().slice(0, 2).toSeq)
      }
    }
  }
} 
Example 20
Source File: Zip.scala    From scala-clippy   with Apache License 2.0 5 votes vote down vote up
package util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

object Zip {
  private val BufferSize = 512

  def compress(string: String): Array[Byte] = {
    val os  = new ByteArrayOutputStream(string.length() / 5)
    val gos = new GZIPOutputStream(os)
    gos.write(string.getBytes("UTF-8"))
    gos.close()
    os.close()
    os.toByteArray
  }

  def decompress(compressed: Array[Byte]): String = {
    val is        = new ByteArrayInputStream(compressed)
    val gis       = new GZIPInputStream(is, BufferSize)
    val string    = new StringBuilder()
    val data      = new Array[Byte](BufferSize)
    var bytesRead = gis.read(data)
    while (bytesRead != -1) {
      string.append(new String(data, 0, bytesRead, "UTF-8"))
      bytesRead = gis.read(data)
    }
    gis.close()
    is.close()
    string.toString()
  }
} 
Example 21
Source File: Utils.scala    From scala-clippy   with Apache License 2.0 5 votes vote down vote up
package com.softwaremill.clippy

import java.io.{ByteArrayOutputStream, InputStream}
import java.io.Closeable
import scala.util.control.NonFatal
import scala.util.{Failure, Try}

object Utils {

  
  def runNonDaemon(t: => Unit) = {
    val shutdownHook = new Thread() {
      private val lock             = new Object
      @volatile private var didRun = false

      override def run() =
        lock.synchronized {
          if (!didRun) {
            t
            didRun = true
          }
        }
    }

    Runtime.getRuntime.addShutdownHook(shutdownHook)
    try shutdownHook.run()
    finally Runtime.getRuntime.removeShutdownHook(shutdownHook)
  }

  def inputStreamToBytes(is: InputStream): Array[Byte] =
    try {
      val baos = new ByteArrayOutputStream()
      val buf  = new Array[Byte](512)
      var read = 0
      while ({ read = is.read(buf, 0, buf.length); read } != -1) {
        baos.write(buf, 0, read)
      }
      baos.toByteArray
    } finally is.close()

  object TryWith {
    def apply[C <: Closeable, R](resource: => C)(f: C => R): Try[R] =
      Try(resource).flatMap(resourceInstance => {
        try {
          val returnValue = f(resourceInstance)
          Try(resourceInstance.close()).map(_ => returnValue)
        } catch {
          case NonFatal(exceptionInFunction) =>
            try {
              resourceInstance.close()
              Failure(exceptionInFunction)
            } catch {
              case NonFatal(exceptionInClose) =>
                exceptionInFunction.addSuppressed(exceptionInClose)
                Failure(exceptionInFunction)
            }
        }
      })
  }
} 
Example 22
Source File: FileUtil.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.cache.internal

import java.io.{ByteArrayOutputStream, InputStream}

object FileUtil {

  // Won't be necessary anymore with Java 9
  // (https://docs.oracle.com/javase/9/docs/api/java/io/InputStream.html#readAllBytes--,
  // via https://stackoverflow.com/questions/1264709/convert-inputstream-to-byte-array-in-java/37681322#37681322)
  def readFullyUnsafe(is: InputStream): Array[Byte] = {
    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ({
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }

  def readFully(is: => InputStream): Array[Byte] = {
    var is0: InputStream = null
    try {
      is0 = is
      readFullyUnsafe(is0)
    } finally {
      if (is0 != null)
        is0.close()
    }
  }

  def withContent(is: InputStream, f: WithContent, bufferSize: Int = 16384): Unit = {
    val data = Array.ofDim[Byte](bufferSize)

    var nRead = is.read(data, 0, data.length)
    while (nRead != -1) {
      f(data, nRead)
      nRead = is.read(data, 0, data.length)
    }
  }

  trait WithContent {
    def apply(arr: Array[Byte], z: Int): Unit
  }

  class UpdateDigest(md: java.security.MessageDigest) extends FileUtil.WithContent {
    def apply(arr: Array[Byte], z: Int): Unit = md.update(arr, 0, z)
  }

} 
Example 23
Source File: ZipTests.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.cli.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Random
import java.util.zip.{Deflater, ZipEntry, ZipInputStream, ZipOutputStream}

import coursier.launcher.internal.Zip
import org.junit.runner.RunWith
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatestplus.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class ZipTests extends AnyFlatSpec {

  "zipEntries" should "be fine with custom deflaters" in {

    // Inspired by https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2297c301250b25e3b80301c58daf3ea0R621

    val baos = new ByteArrayOutputStream
    val output = new ZipOutputStream(baos) {
      `def` = new Deflater(Deflater.NO_COMPRESSION, true)
    }
    val data = Array.ofDim[Byte](1024 * 1024)
    new Random().nextBytes(data)
    val entry = new ZipEntry("entry.dat")
    output.putNextEntry(entry)
    output.write(data)
    output.closeEntry()
    output.close()

    val result = baos.toByteArray

    val zos = new ZipOutputStream(new ByteArrayOutputStream)
    val entryNames = Zip.zipEntries(new ZipInputStream(new ByteArrayInputStream(result)))
      .map {
        case (ent, content) =>
          println(ent.getCompressedSize)
          val name = ent.getName
          zos.putNextEntry(ent)
          zos.write(content)
          zos.closeEntry()
          name
      }
      .toVector
    zos.close()
    assert(entryNames == Vector("entry.dat"))
  }

} 
Example 24
Source File: WordEmbeddingsLoader.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.embeddings

import java.io.{BufferedInputStream, ByteArrayOutputStream, DataInputStream, FileInputStream}

import com.johnsnowlabs.storage.RocksDBConnection
import org.slf4j.LoggerFactory

import scala.io.Source

object WordEmbeddingsTextIndexer {

  def index(
             source: Iterator[String],
             writer: WordEmbeddingsWriter
           ): Unit = {
    try {
      for (line <- source) {
        val items = line.split(" ")
        val word = items(0)
        val embeddings = items.drop(1).map(i => i.toFloat)
        writer.add(word, embeddings)
      }
    } finally {
      writer.close()
    }
  }

  def index(
             source: String,
             writer: WordEmbeddingsWriter
           ): Unit = {
    val sourceFile = Source.fromFile(source)("UTF-8")
    val lines = sourceFile.getLines()
    index(lines, writer)
    sourceFile.close()
  }
}


object WordEmbeddingsBinaryIndexer {

  private val logger = LoggerFactory.getLogger("WordEmbeddings")

  def index(
             source: DataInputStream,
             writer: WordEmbeddingsWriter): Unit = {

    try {
      // File Header
      val numWords = Integer.parseInt(readString(source))
      val vecSize = Integer.parseInt(readString(source))

      // File Body
      for (i <- 0 until numWords) {
        val word = readString(source)

        // Unit Vector
        val vector = readFloatVector(source, vecSize, writer)
        writer.add(word, vector)
      }

      logger.info(s"Loaded $numWords words, vector size $vecSize")
    } finally {
      writer.close()
    }
  }

  def index(
             source: String,
             writer: WordEmbeddingsWriter): Unit = {

    val ds = new DataInputStream(new BufferedInputStream(new FileInputStream(source), 1 << 15))

    try {
      index(ds, writer)
    } finally {
      ds.close()
    }
  }

  
  private def readFloatVector(ds: DataInputStream, vectorSize: Int, indexer: WordEmbeddingsWriter): Array[Float] = {
    // Read Bytes
    val vectorBuffer = Array.fill[Byte](4 * vectorSize)(0)
    ds.read(vectorBuffer)

    // Convert Bytes to Floats
    indexer.fromBytes(vectorBuffer)
  }
} 
Example 25
Source File: TMNodesWriter.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.annotators.btm

import java.io.{ByteArrayOutputStream, ObjectOutputStream}

import com.johnsnowlabs.storage.{RocksDBConnection, StorageBatchWriter}

class TMNodesWriter(
                    override protected val connection: RocksDBConnection
                  ) extends StorageBatchWriter[TrieNode] {

  def toBytes(content: TrieNode): Array[Byte] = {
    val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(stream)
    oos.writeObject(content)
    oos.close()
    stream.toByteArray
  }

  def add(word: Int, value: TrieNode): Unit = {
    super.add(word.toString, value)
  }

  override protected def writeBufferSize: Int = 10000
} 
Example 26
Source File: MemoryAppender.scala    From ncdbg   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.programmaticallyspeaking.ncd.testing

import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.core.UnsynchronizedAppenderBase
import ch.qos.logback.core.encoder.Encoder
import ch.qos.logback.core.status.ErrorStatus
import java.io.{ByteArrayOutputStream, IOException, OutputStream}
import java.nio.charset.StandardCharsets

import com.programmaticallyspeaking.ncd.messaging.{Observable, SerializedSubject}

object MemoryAppender {
  private[MemoryAppender] val logEventSubject = new SerializedSubject[String]

  def logEvents: Observable[String] = logEventSubject
}

class MemoryAppender extends UnsynchronizedAppenderBase[ILoggingEvent] {
  import MemoryAppender._
  private var encoder: Encoder[ILoggingEvent] = _
  private var outputStream = new OutputStream {
    override def write(b: Int): Unit = ???

    override def write(b: Array[Byte]): Unit = {
      val str = new String(b, StandardCharsets.UTF_8)
      logEventSubject.onNext(str)
    }
  }

  override def start(): Unit = {
    try {
      Option(encoder).foreach(_.init(outputStream))
      super.start()
    } catch {
      case e: IOException =>
        started = false
        addStatus(new ErrorStatus("Failed to initialize encoder for appender named [" + name + "].", this, e))
    }
  }

  override protected def append(event: ILoggingEvent): Unit = {
    if (!isStarted) return
    try {
      event.prepareForDeferredProcessing()
      Option(encoder).foreach(_.doEncode(event))
    } catch {
      case ioe: IOException =>
        started = false
        addStatus(new ErrorStatus("IO failure in appender", this, ioe))
    }
  }

  def setEncoder(e: Encoder[ILoggingEvent]): Unit = {
    encoder = e
  }
} 
Example 27
Source File: StreamUtilities.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.{ByteArrayOutputStream, InputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils

import scala.io.Source
import scala.util.Random

object StreamUtilities {

  import scala.util.{Failure, Success, Try}
  def usingMany[T <: AutoCloseable, U](disposable: Seq[T])(task: Seq[T] => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.foreach(d => d.close())
    }
  }

  def using[T <: AutoCloseable, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  def usingSource[T <: Source, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  
  class ZipIterator(stream: InputStream, zipfile: String, random: Random, sampleRatio: Double = 1)
    extends Iterator[(String, Array[Byte])] {

    private val zipStream = new ZipInputStream(stream)

    private def getNext: Option[(String, Array[Byte])] = {
      var entry = zipStream.getNextEntry
      while (entry != null) {
        if (!entry.isDirectory && random.nextDouble < sampleRatio) {

          val filename = zipfile + java.io.File.separator + entry.getName

          //extracting all bytes of a given entry
          val byteStream = new ByteArrayOutputStream
          IOUtils.copy(zipStream, byteStream)
          val bytes = byteStream.toByteArray

          assert(bytes.length == entry.getSize,
            "incorrect number of bytes is read from zipstream: " + bytes.length + " instead of " + entry.getSize)

          return Some((filename, bytes))
        }
        entry = zipStream.getNextEntry
      }

      stream.close()
      None
    }

    private var nextValue = getNext

    def hasNext: Boolean = nextValue.isDefined

    def next: (String, Array[Byte]) = {
      val result = nextValue.get
      nextValue = getNext
      result
    }
  }

} 
Example 28
Source File: ConsoleProgressBarTest.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package hu.ssh.progressbar.console

import java.io.{ByteArrayOutputStream, PrintStream}

import com.google.common.base.{Splitter, Strings}
import com.google.common.collect.Iterables
import hu.ssh.progressbar.ConsoleProgressBar
import org.scalatest.{FlatSpec, Matchers}


class ConsoleProgressBarTest extends FlatSpec with Matchers {
  "ProgressBar" should "output as expected" in {
    val outputstream = new ByteArrayOutputStream
    try {
      val progressBar = ConsoleProgressBar.on(new PrintStream(outputstream)).withFormat(":percent")
      progressBar.tick(0)
      assert(getLastOutput(outputstream.toString) == "  0.00")
      progressBar.tick(25)
      assert(getLastOutput(outputstream.toString) == " 25.00")
      progressBar.tick(30)
      assert(getLastOutput(outputstream.toString) == " 55.00")
      progressBar.tick(44)
      assert(getLastOutput(outputstream.toString) == " 99.00")
      progressBar.tickOne()
      assert(getLastOutput(outputstream.toString) == "100.00")
    } finally outputstream.close()
  }

  private def getLastOutput(string: String): String = {
    if (Strings.isNullOrEmpty(string)) return string
    val outputs = Splitter.on(ConsoleProgressBar.CARRIAGE_RETURN).omitEmptyStrings.split(string)
    Iterables.getLast(outputs)
  }
} 
Example 29
Source File: ConsoleLoggerSpec.scala    From odin   with Apache License 2.0 5 votes vote down vote up
package io.odin.loggers

import java.io.{ByteArrayOutputStream, PrintStream}

import cats.effect.{IO, Timer}
import cats.syntax.all._
import io.odin.Level._
import io.odin.formatter.Formatter
import io.odin.{Level, LoggerMessage, OdinSpec}

class ConsoleLoggerSpec extends OdinSpec {
  implicit val timer: Timer[IO] = IO.timer(scala.concurrent.ExecutionContext.global)

  it should "route all messages with level <= INFO to stdout" in {
    forAll { (loggerMessage: LoggerMessage, formatter: Formatter) =>
      whenever(loggerMessage.level <= Info) {
        val outBaos = new ByteArrayOutputStream()
        val stdOut = new PrintStream(outBaos)
        val errBaos = new ByteArrayOutputStream()
        val stdErr = new PrintStream(errBaos)

        val consoleLogger = ConsoleLogger[IO](formatter, stdOut, stdErr, Level.Trace)
        consoleLogger.log(loggerMessage).unsafeRunSync()
        outBaos.toString() shouldBe (formatter.format(loggerMessage) + System.lineSeparator())
      }
    }
  }

  it should "route all messages with level >= WARN to stderr" in {
    forAll { (loggerMessage: LoggerMessage, formatter: Formatter) =>
      whenever(loggerMessage.level > Info) {
        val outBaos = new ByteArrayOutputStream()
        val stdOut = new PrintStream(outBaos)
        val errBaos = new ByteArrayOutputStream()
        val stdErr = new PrintStream(errBaos)

        val consoleLogger = ConsoleLogger[IO](formatter, stdOut, stdErr, Level.Trace)
        consoleLogger.log(loggerMessage).unsafeRunSync()
        errBaos.toString() shouldBe (formatter.format(loggerMessage) + System.lineSeparator())
      }
    }
  }
} 
Example 30
Source File: LoggerOutputStream.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils

import java.io.{ByteArrayOutputStream, OutputStream}
import java.nio.charset.StandardCharsets

import org.apache.log4j.{Level, Logger}

class LoggerOutputStream(logger: Logger, level: Level) extends OutputStream {
  private val buffer = new ByteArrayOutputStream()

  override def write(b: Int) {
    buffer.write(b)
    if (b == '\n') {
      val line = buffer.toString(StandardCharsets.UTF_8.name())
      level match {
        case Level.TRACE => logger.trace(line)
        case Level.DEBUG => logger.debug(line)
        case Level.INFO  => logger.info(line)
        case Level.WARN  => logger.warn(line)
        case Level.ERROR => logger.error(line)
      }
      buffer.reset()
    }
  }
} 
Example 31
Source File: CodecSpec.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream}

import is.hail.annotations.{Region, RegionValue}
import is.hail.asm4s.{Code, TypeInfo, Value}
import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo}
import is.hail.types.encoded.EType
import is.hail.types.physical.PType
import is.hail.types.virtual.Type
import is.hail.rvd.RVDContext
import is.hail.sparkextras.ContextRDD
import is.hail.utils.using
import org.apache.spark.rdd.RDD

trait AbstractTypedCodecSpec extends Spec {
  def encodedType: EType
  def encodedVirtualType: Type

  type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit]
  type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T]

  def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder

  def decodedPType(requestedType: Type): PType

  def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder)

  def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset))
    baos.toByteArray
  }

  def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = {
    val bais = new ByteArrayInputStream(bytes)
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, dec(bais).readRegionValue(region))
  }

  def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer]

  def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T])

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = {
    val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb)
    assert(ti == typeToTypeInfo(requestedType))
    ptype -> dec
  }

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = {
    assert(ti == typeToTypeInfo(t))
    buildEmitEncoderF[T](t, cb)
  }

  // FIXME: is there a better place for this to live?
  def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = {
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) =>
      RegionValue.fromBytes(dec, ctx.region, it)
    })
  }

  override def toString: String = super[Spec].toString
} 
Example 32
Source File: SparqlUtil.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.tools.neptune.export

import java.io.ByteArrayOutputStream
import java.net.URLEncoder

import org.apache.jena.graph.Graph
import org.apache.jena.riot.{Lang, RDFDataMgr}

object SparqlUtil {



   def extractSubjectFromTriple(triple: String):String = {
    triple.split(" ")(0)
  }

   def getTriplesOfSubGraph(subGraph:Graph):String  = {
    val tempOs = new ByteArrayOutputStream
    RDFDataMgr.write(tempOs, subGraph, Lang.NTRIPLES)
    new String(tempOs.toByteArray, "UTF-8")
  }

  def generateSparqlCmdForDefaultGraph(triplesPerGraph: Iterable[SubjectGraphTriple] ):String = {
    triplesPerGraph.map(subGraphTriple => encode(subGraphTriple.triple)).mkString
  }

  def generateSparqlCmdForNamedGraph(graph:String, triplesPerGraph: Iterable[SubjectGraphTriple] ):String = {

    " GRAPH <" + encode(graph) + "> { " + triplesPerGraph.map(trio => encode(trio.triple)).mkString + "}"
  }

  def buildGroupedSparqlCmd(subjects: Iterable[String], allSubjGraphTriples: Iterable[List[SubjectGraphTriple]], updateMode: Boolean): String = {
    var sparqlCmd = "update="
    val deleteSubj = if (updateMode) Some("DELETE { ?s ?p ?o . } WHERE { VALUES ?s { " + subjects.map(subject => encode(subject) + " ").mkString + "} ?s ?p ?o };") else None
    val insertDefaultGraphSparqlCmd = "INSERT DATA {" + allSubjGraphTriples.flatten.filterNot(trio => predicateContainsMeta(trio)).groupBy(trio => trio.graph).map(graphWithTriples => graphWithTriples._1.fold(generateSparqlCmdForDefaultGraph(graphWithTriples._2))(graph => "")).mkString + "}"
    val insertNamedGraphSparqlCmd = "INSERT DATA {" + allSubjGraphTriples.flatten.filterNot(trio => predicateContainsMeta(trio)).groupBy(trio => trio.graph).map(graphWithTriples => graphWithTriples._1.fold("")(graphName => generateSparqlCmdForNamedGraph(graphName, graphWithTriples._2))).mkString + "}"
    sparqlCmd + deleteSubj.getOrElse("") + insertDefaultGraphSparqlCmd + ";" + insertNamedGraphSparqlCmd
  }

   def encode(str: String):String = {
    URLEncoder.encode(str, "UTF-8")
  }

   def predicateContainsMeta(trio: SubjectGraphTriple): Boolean = {
    trio.triple.contains("meta/sys")
  }

} 
Example 33
Source File: SubEntryTest.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import org.specs2.mutable._

import chess.{ Color, Pos }
import chess.format.Uci

class SubEntryTest extends Specification {

  private def pipe(entry: SubEntry): SubEntry = {
    val out = new ByteArrayOutputStream()
    entry.write(out)

    val in = new ByteArrayInputStream(out.toByteArray)
    SubEntry.read(in)
  }

  "master database packer" should {

    "pack a single game" in {
      val ref   = GameRef("ref00000", Some(Color.White), SpeedGroup.Blitz, 1230)
      val entry = SubEntry.fromGameRef(ref, Left(Uci.Move(Pos.E2, Pos.E4)))

      pipe(entry).gameRefs mustEqual List(ref)
    }

    "pack two games" in {
      val move  = Left(Uci.Move(Pos.D2, Pos.D4))
      val g1    = GameRef("g0000001", Some(Color.Black), SpeedGroup.Classical, 2300)
      val g2    = GameRef("g0000002", None, SpeedGroup.Classical, 2455)
      val entry = SubEntry.fromGameRef(g1, move).withGameRef(g2, move)

      pipe(entry).gameRefs mustEqual List(g2, g1)
    }
  }
} 
Example 34
Source File: PackHelperTest.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import org.specs2.mutable._
import chess.format.Uci
import chess.Pos
import chess.{ King, Rook }

class PackHelperTest extends Specification with PackHelper {

  def pipeMove(move: Either[Uci.Move, Uci.Drop]): Either[Uci.Move, Uci.Drop] = {
    val out = new ByteArrayOutputStream()
    writeUci(out, move)

    val in = new ByteArrayInputStream(out.toByteArray)
    readUci(in)
  }

  "the pack helper" should {
    "correctly pack moves" in {
      val move = Uci.Move(Pos.E2, Pos.E3)
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack promotions" in {
      val move = Uci.Move(Pos.A7, Pos.A8, Some(Rook))
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack drops" in {
      val drop = Uci.Drop(King, Pos.H3)
      pipeMove(Right(drop)) mustEqual Right(drop)
    }
  }

  List(7, 127, 128, 129, 254, 255, 256, 257, 1234, 864197252500L).foreach { x =>
    "correctly pack uint: " + x in {
      val out = new ByteArrayOutputStream()
      writeUint(out, x)

      val in = new ByteArrayInputStream(out.toByteArray)
      readUint(in) mustEqual x
    }
  }
} 
Example 35
Source File: Json4sSerialization.scala    From kafka-serialization   with Apache License 2.0 5 votes vote down vote up
package com.ovoenergy.kafka.serialization.json4s

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.ovoenergy.kafka.serialization.core._
import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import org.json4s.Formats
import org.json4s.native.Serialization.{read, write}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._

trait Json4sSerialization {

  def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO Use scala-arm
    try {
      write(data, writer)
      writer.flush()
    } finally {
      writer.close()
    }
    bout.toByteArray
  }

  def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) =>
    val tt = implicitly[TypeTag[T]]
    implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe))
    read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8))
  }

} 
Example 36
Source File: SpraySerialization.scala    From kafka-serialization   with Apache License 2.0 5 votes vote down vote up
package com.ovoenergy.kafka.serialization.spray

import java.io.{ByteArrayOutputStream, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import spray.json._
import com.ovoenergy.kafka.serialization.core._

trait SpraySerialization {

  def spraySerializer[T](implicit format: JsonWriter[T]): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val osw = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO use scala-arm
    try {
      osw.write(data.toJson.compactPrint)
      osw.flush()
    } finally {
      osw.close()
    }
    bout.toByteArray
  }

  def sprayDeserializer[T](implicit format: JsonReader[T]): KafkaDeserializer[T] = deserializer { (_, data) =>
    JsonParser(ParserInput(data)).convertTo[T]
  }

} 
Example 37
Source File: Serialize.scala    From morpheus   with Apache License 2.0 5 votes vote down vote up
package org.opencypher.morpheus.impl.expressions

import java.io.ByteArrayOutputStream

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, _}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.opencypher.morpheus.impl.expressions.EncodeLong.encodeLong
import org.opencypher.morpheus.impl.expressions.Serialize._
import org.opencypher.okapi.impl.exception


case class Serialize(children: Seq[Expression]) extends Expression {

  override def dataType: DataType = BinaryType

  override def nullable: Boolean = false

  // TODO: Only write length if more than one column is serialized
  override def eval(input: InternalRow): Any = {
    // TODO: Reuse from a pool instead of allocating a new one for each serialization
    val out = new ByteArrayOutputStream()
    children.foreach { child =>
      child.dataType match {
        case BinaryType => write(child.eval(input).asInstanceOf[Array[Byte]], out)
        case StringType => write(child.eval(input).asInstanceOf[UTF8String], out)
        case IntegerType => write(child.eval(input).asInstanceOf[Int], out)
        case LongType => write(child.eval(input).asInstanceOf[Long], out)
        case other => throw exception.UnsupportedOperationException(s"Cannot serialize Spark data type $other.")
      }
    }
    out.toByteArray
  }

  override protected def doGenCode(
    ctx: CodegenContext,
    ev: ExprCode
  ): ExprCode = {
    ev.isNull = FalseLiteral
    val out = ctx.freshName("out")
    val serializeChildren = children.map { child =>
      val childEval = child.genCode(ctx)
      s"""|${childEval.code}
          |if (!${childEval.isNull}) {
          |  ${Serialize.getClass.getName.dropRight(1)}.write(${childEval.value}, $out);
          |}""".stripMargin
    }.mkString("\n")
    val baos = classOf[ByteArrayOutputStream].getName
    ev.copy(
      code = code"""|$baos $out = new $baos();
          |$serializeChildren
          |byte[] ${ev.value} = $out.toByteArray();""".stripMargin)
  }

}

object Serialize {

  val supportedTypes: Set[DataType] = Set(BinaryType, StringType, IntegerType, LongType)

  @inline final def write(value: Array[Byte], out: ByteArrayOutputStream): Unit = {
    out.write(encodeLong(value.length))
    out.write(value)
  }

  @inline final def write(
    value: Boolean,
    out: ByteArrayOutputStream
  ): Unit = write(if (value) 1.toLong else 0.toLong, out)

  @inline final def write(value: Byte, out: ByteArrayOutputStream): Unit = write(value.toLong, out)

  @inline final def write(value: Int, out: ByteArrayOutputStream): Unit = write(value.toLong, out)

  @inline final def write(value: Long, out: ByteArrayOutputStream): Unit = write(encodeLong(value), out)

  @inline final def write(value: UTF8String, out: ByteArrayOutputStream): Unit = write(value.getBytes, out)

  @inline final def write(value: String, out: ByteArrayOutputStream): Unit = write(value.getBytes, out)

} 
Example 38
Source File: ExampleTest.scala    From morpheus   with Apache License 2.0 5 votes vote down vote up
package org.opencypher.morpheus.examples

import java.io.{ByteArrayOutputStream, PrintStream}
import java.net.URI

import org.junit.runner.RunWith
import org.opencypher.okapi.testing.Bag._
import org.scalatest.{BeforeAndAfterAll, FunSpec, Matchers}
import org.scalatestplus.junit.JUnitRunner

import scala.io.Source

@RunWith(classOf[JUnitRunner])
abstract class ExampleTest extends FunSpec with Matchers with BeforeAndAfterAll {

  private val oldStdOut = System.out

  protected val emptyOutput: String = ""

  protected def validate(app: => Unit, expectedOut: URI): Unit = {
    validate(app, Source.fromFile(expectedOut).mkString)
  }

  protected def validateBag(app: => Unit, expectedOut: URI): Unit = {
    val source = Source.fromFile(expectedOut)
    val expectedLines = source.getLines().toList
    val appLines = capture(app).split(System.lineSeparator())
    withClue(s"${appLines.mkString("\n")} not equal to ${expectedLines.mkString("\n")}") {
      appLines.toBag shouldEqual expectedLines.toBag
    }
  }

  protected def validate(app: => Unit, expectedOut: String): Unit = {
    capture(app) shouldEqual expectedOut
  }

  private def capture(app: => Unit): String = {
    val charset = "UTF-8"
    val outCapture = new ByteArrayOutputStream()
    val printer = new PrintStream(outCapture, true, charset)
    Console.withOut(printer)(app)
    outCapture.toString(charset)
  }

  override protected def afterAll(): Unit = {
    System.setOut(oldStdOut)
    super.afterAll()
  }
} 
Example 39
Source File: RawTextSender.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
} 
Example 40
Source File: RateLimitedOutputStreamSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes(StandardCharsets.UTF_8)) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
} 
Example 41
Source File: ByteBufferOutputStream.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


private[spark] class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
} 
Example 42
Source File: PythonRDDSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes(StandardCharsets.UTF_8), null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes(StandardCharsets.UTF_8)), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(Iterator(
      (null, null),
      ("a".getBytes(StandardCharsets.UTF_8), null),
      (null, "b".getBytes(StandardCharsets.UTF_8))), buffer)
  }
} 
Example 43
Source File: GenericAvroSerializerSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 44
Source File: MqttConfig.scala    From akka-iot-mqtt-v2   with GNU Lesser General Public License v3.0 5 votes vote down vote up
package akkaiot

import scala.concurrent.duration._

import java.io.Serializable
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.sandinh.paho.akka._
import com.sandinh.paho.akka.MqttPubSub._

object MqttConfig {
  val topic = "akka-iot-mqtt-topic"

  // Pub-Sub config
  val psConfig = PSConfig(
    brokerUrl = "tcp://test.mosquitto.org:1883",
    userName = null,
    password = null,
    stashTimeToLive = 1.minute,
    stashCapacity = 8000,
    reconnectDelayMin = 10.millis,
    reconnectDelayMax = 30.seconds,
    cleanSession = false
  )

  // Serialize object to byte array
  def writeToByteArray(obj: Any): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val oos = new ObjectOutputStream(baos)
    try {
      oos.writeObject(obj)
      baos.toByteArray
    } finally {
      try {
        oos.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }

  // Deserialize object from byte array
  def readFromByteArray[A](bytes: Array[Byte]): A = {
    val bais = new ByteArrayInputStream(bytes)
    val ois = new ObjectInputStream(bais)
    try {
      val obj = ois.readObject
      obj.asInstanceOf[A]
    } finally {
      try {
        ois.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }
} 
Example 45
Source File: EventHistoryFileReportingSuite.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
package com.qubole.sparklens.app

import java.io.{ByteArrayOutputStream, PrintStream}

import com.qubole.sparklens.TestUtils
import org.scalatest.FunSuite

class EventHistoryFileReportingSuite extends FunSuite {

  test("Reporting from sparklens and event-history should be same") {
    val eventHistoryFile = s"${System.getProperty("user.dir")}" +
      s"/src/test/event-history-test-files/local-1532512550423"

    // corresponding sparklens dump is in same location and name, but additional suffix
    val sparklensDump = TestUtils.getFileContents(eventHistoryFile + ".sparklens.json")

    validateOutput(outputFromSparklensDump(sparklensDump),
      outputFromEventHistoryReport(eventHistoryFile))
  }


  private def outputFromSparklensDump(dump: String): String = {
    val out = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      ReporterApp.startAnalysersFromString(dump)
    }
    out.toString

  }
  private def outputFromEventHistoryReport(file: String): String = {
    val out = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      new EventHistoryReporter(file)
    }
    out.toString
  }

  private def validateOutput(file1:String, file2:String) = {
    assert(file1.size == file2.size,
      "output size is different between eventlogs report and sparklens.json report")
    assert(file1.lines.zip(file2.lines).filterNot(x => x._1 == x._2).size == 0,
      "Report lines are not matching between eventlogs report and sparklens.json report")
  }
} 
Example 46
Source File: CompatibilitySuite.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
import java.io.{ByteArrayOutputStream, FileNotFoundException, PrintStream}

import com.qubole.sparklens.TestUtils
import com.qubole.sparklens.app.ReporterApp
import org.scalatest.FunSuite

import scala.util.control.Breaks._

class CompatibilitySuite extends FunSuite {

  test("should be able to report on previously generated sparklens dumps") {

    breakable {

      (1 to 100).foreach(x => { //run for the versions of sparklens output saved
        try {

          val testInput = TestUtils.getFileContents(
            s"${System.getProperty("user.dir")}/src/test/compatibility-files/version-${x}.json")

          val testOut = new ByteArrayOutputStream()
          Console.withOut(new PrintStream(testOut)) {
            ReporterApp.startAnalysersFromString(testInput)
          }
          val testOutput = testOut.toString

          val olderOutput = TestUtils.getFileContents(
            s"${System.getProperty("user.dir")}/src/test/compatibility-files/version-${x}.output")

          
          olderOutput.split("\n").foreach(line => {
            assert(testOutput.contains(line))
          })
        } catch {
          case e: FileNotFoundException => break
        }
      })
    }
  }

} 
Example 47
Source File: RawTextSender.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
} 
Example 48
Source File: RateLimitedOutputStreamSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
} 
Example 49
Source File: PythonRDDSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(
      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
  }
} 
Example 50
Source File: Serialization.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization 
Example 51
Source File: MavenAddManagedDependenciesSubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenAddManagedDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenAddManagedDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenAddManagedDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenAddManagedDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddManagedDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenAddManagedDependenciesRule")
    loggingEvent.setMessage("{} added managed dependency {} to {}")
    loggingEvent.setArgumentArray(Array("MavenAddManagedDependenciesSubscriber", "xxxx:yyyy:zzzz", new File(projectRoot, "pom.xml")))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
  }
} 
Example 52
Source File: MavenExcludeDependenciesSubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenExcludeDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenExcludeDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenExcludeDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenExcludeDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenExcludeDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenExcludeDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenExcludeDependenciesRule")
    loggingEvent.setMessage("{} excluded {} from {} in {}")
    loggingEvent.setArgumentArray(Array(
      "MavenExcludeDependenciesRule",
      Set("zzzz:yyyy:xxxx"),
      "xxxx:yyyy:zzzz",
      new File(projectRoot, "pom.xml")
    ))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
    result should include ("zzzz:yyyy:xxxx")
  }
} 
Example 53
Source File: MavenRemoveDependenciesSubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenRemoveDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenRemoveDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenRemoveDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenRemoveDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenRemoveDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenRemoveDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenRemoveDependenciesRule")
    loggingEvent.setMessage("{} removed dependency {} from {}")
    loggingEvent.setArgumentArray(Array("MavenRemoveDependenciesRule", "xxxx:yyyy:zzzz", new File(projectRoot, "pom.xml")))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
  }
} 
Example 54
Source File: MavenAddDependenciesSubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenAddDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenAddDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenAddDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenAddDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenAddDependenciesRule")
    loggingEvent.setMessage("{} added dependency {} to {}")
    loggingEvent.setArgumentArray(Array("MavenAddDependenciesRule", "xxxx:yyyy:zzzz", new File(projectRoot, "pom.xml")))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
  }
} 
Example 55
Source File: MavenDependenciesMappingSubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenDependenciesMappingSubscriberTest extends FlatSpecLike with Matchers {

  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenDependenciesMappingSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenDependenciesMappingSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenDependenciesMappingRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenDependenciesMappingSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenDependenciesMappingSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenDependenciesMappingRule")
    loggingEvent.setMessage("{} mapped {} to {} in {}")
    loggingEvent.setArgumentArray(Array(
      "MavenDependenciesMappingRule",
      Set("zzzz:yyyy:xxxx"),
      Set("xxxx:yyyy:zzzz"),
      new File(projectRoot, "pom.xml")
    ))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
    result should include ("zzzz:yyyy:xxxx")
  }
} 
Example 56
Source File: ArtifactsSummarySubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class ArtifactsSummarySubscriberTest extends FlatSpecLike with Matchers {

  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "ArtifactsSummarySubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ArtifactsSummarySubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "ArtifactsSummarySubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ArtifactsSummarySubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Found maven pom {} for artifact {}")
    loggingEvent.setArgumentArray(Array(new File(projectRoot, "abc/pom.xml"), "abc"))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("abc/pom.xml")
    result should include ("abc")
  }
} 
Example 57
Source File: UpgradeSummarySubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.report.impl

import java.io.ByteArrayOutputStream

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class UpgradeSummarySubscriberTest extends FlatSpecLike with Matchers {

  "UpgradeSummarySubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new UpgradeSummarySubscriber

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "UpgradeSummarySubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new UpgradeSummarySubscriber

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Rule some_rule was applied to 3 files")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("|[some_rule](#some_rule) | impacted 3 file(s) |")
  }
} 
Example 58
Source File: ProjectDetailsSubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.report.impl

import java.io.ByteArrayOutputStream

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class ProjectDetailsSubscriberTest extends FlatSpecLike with Matchers {

  "ProjectDetailsSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ProjectDetailsSubscriber

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "ProjectDetailsSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ProjectDetailsSubscriber

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Starting upgrade Scala project to 2.5.4-RELEASE, pom pom.xml with taskId None")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("pom.xml")
    result should include ("2.5.4-RELEASE")
    result should include ("Scala project upgrade report")
    result should include ("Upgrade job ID | None")
    result should include ("Full upgrade log | [link](raptor-upgrade-debug.log)")
    result should include ("Upgrade warnings only log | [link](raptor-upgrade-warn.log)")
    outputStream.reset()

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setMessage("Starting upgrade Scala project to 2.5.4-RELEASE, pom pom.xml with taskId Some(1234)")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    val result2 = new String(outputStream.toByteArray)
    result2 should include ("pom.xml")
    result2 should include ("2.5.4-RELEASE")
    result2 should include ("Scala project upgrade report")
    result2 should include ("Upgrade job ID | Some(1234)")
    result2 should include ("Full upgrade log | [link](raptor-upgrade-debug-1234.log)")
    result2 should include ("Upgrade warnings only log | [link](raptor-upgrade-warn-1234.log)")
  }
} 
Example 59
Source File: ManualChangesSummarySubscriberTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.report.impl

import java.io.ByteArrayOutputStream

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class ManualChangesSummarySubscriberTest extends FlatSpecLike with Matchers {

  "ManualChangesSummarySubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ManualChangesSummarySubscriber

    subscriber.accept("hahaha")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "ManualChangesSummarySubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ManualChangesSummarySubscriber

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Rule blahblah requires 1000 manual changes")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("|[blahblah](#blahblah) | 1000 manual changes required |")
  }
} 
Example 60
Source File: JavaSerde.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.core.serde

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}

import akka.actor.ExtendedActorSystem
import akka.serialization.JavaSerializer
import akka.util.ClassLoaderObjectInputStream

class JavaSerde(system: ExtendedActorSystem) extends Serde[AnyRef] {

  override def identifier: Int = 101

  override def close(): Unit = ()

  override def fromBytes(bytes: Array[Byte]): AnyRef = {
    val in = new ClassLoaderObjectInputStream(system.dynamicAccess.classLoader, new ByteArrayInputStream(bytes))
    val obj = JavaSerializer.currentSystem.withValue(system) { in.readObject }
    in.close()
    obj
  }

  override def toBytes(o: AnyRef): Array[Byte] = {
    val bos = new ByteArrayOutputStream
    val out = new ObjectOutputStream(bos)
    JavaSerializer.currentSystem.withValue(system) { out.writeObject(o) }
    out.close()
    bos.toByteArray
  }

} 
Example 61
Source File: SeqSerde.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SeqSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Seq[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 141

  override def close(): Unit = ()

  override protected def fromBytes(bytes: Array[Byte]): Seq[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toList
    di.close()
    result
  }

  override def toBytes(seq: Seq[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(seq.size)
    for (a: Any <- seq) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }
} 
Example 62
Source File: SetSerde.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SetSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Set[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 142

  override protected def fromBytes(bytes: Array[Byte]): Set[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toSet
    di.close()
    result
  }

  override def toBytes(set: Set[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(set.size)
    for (a: Any <- set) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }

  override def close() = ()
} 
Example 63
Source File: ResponseHelper.scala    From OUTDATED_ledger-wallet-android   with MIT License 5 votes vote down vote up
package co.ledger.wallet.core.net

import java.io.{ByteArrayOutputStream, BufferedInputStream}

import co.ledger.wallet.core.utils.io.IOUtils
import co.ledger.wallet.core.utils.logs.Logger
import org.json.{JSONArray, JSONObject}
import co.ledger.wallet.core.net.HttpRequestExecutor.defaultExecutionContext
import scala.concurrent.Future
import scala.io.Source
import scala.util.{Failure, Success}

object ResponseHelper {

  implicit class ResponseFuture(f: Future[HttpClient#Response]) {

    def json: Future[(JSONObject, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        Logger.d("Converting to json")
        (new JSONObject(body), response)
      }
    }

    def jsonArray: Future[(JSONArray, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONArray(body), response)
      }
    }

    def string: Future[(String, HttpClient#Response)] = {
      f.map { response =>
        Logger.d("Converting to string")
        (Source.fromInputStream(response.body).mkString, response)
      }
    }

    def bytes: Future[(Array[Byte], HttpClient#Response)] = {
      f.map { response =>
        val input = new BufferedInputStream(response.body)
        val output = new ByteArrayOutputStream()
        IOUtils.copy(input, output)
        val result = output.toByteArray
        input.close()
        output.close()
        (result, response)
      }
    }

    def noResponseBody: Future[HttpClient#Response] = {
      f.andThen {
        case Success(response) =>
          response.body.close()
          response
        case Failure(cause) =>
          throw cause
      }
    }

  }

} 
Example 64
Source File: RawTextSender.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    //解析使用模式匹配的参数
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    //多次重复输入数据以填充缓冲区
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
} 
Example 65
Source File: RateLimitedOutputStreamSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {//写
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    //desiredBytesPerSec 每秒所需的字节数
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
} 
Example 66
Source File: PythonRDDSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {
  //写大串给worker
  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }
  //很好的处理null
  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(
      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
  }
} 
Example 67
Source File: GenericAvroSerializerSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {//模式压缩与解压缩
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {//记录序列化和反序列化
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }
  //使用模式指纹以减少信息大小
  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {//缓存之前模式
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 68
Source File: ByteBufferOutputStream.scala    From kraps-rpc   with Apache License 2.0 5 votes vote down vote up
package net.neoremind.kraps.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
} 
Example 69
Source File: Unpacker.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}
import java.nio.ByteBuffer
import java.util.zip.GZIPInputStream

import com.expedia.open.tracing.buffer.SpanBuffer
import com.github.luben.zstd.ZstdInputStream
import org.apache.commons.io.IOUtils
import org.json4s.jackson.Serialization
import org.xerial.snappy.SnappyInputStream

object Unpacker {
  import PackedMessage._

  private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = {
    val byteBuffer = ByteBuffer.wrap(packedDataBytes)
    val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) }
    if (magicBytesExist) {
      val headerLength = byteBuffer.getInt
      val metadataBytes = new Array[Byte](headerLength)
      byteBuffer.get(metadataBytes, 0, headerLength)
      metadataBytes
    } else {
      null
    }
  }

  private def unpack(compressedStream: InputStream) = {
    val outputStream = new ByteArrayOutputStream()
    IOUtils.copy(compressedStream, outputStream)
    outputStream.toByteArray
  }

  def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = {
    var parsedDataBytes: Array[Byte] = null
    val metadataBytes = readMetadata(packedDataBytes)
    if (metadataBytes != null) {
      val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes))
      val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length
      packedMetadata.t match {
        case PackerType.SNAPPY =>
          parsedDataBytes = unpack(
            new SnappyInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.GZIP =>
          parsedDataBytes = unpack(
            new GZIPInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.ZSTD =>
          parsedDataBytes = unpack(
            new ZstdInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case _ =>
          return SpanBuffer.parseFrom(
            new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))
      }
    } else {
      parsedDataBytes = packedDataBytes
    }
    SpanBuffer.parseFrom(parsedDataBytes)
  }
} 
Example 70
Source File: Packer.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream}
import java.util.zip.GZIPOutputStream

import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType
import com.github.luben.zstd.ZstdOutputStream
import com.google.protobuf.GeneratedMessageV3
import org.apache.commons.io.IOUtils
import org.xerial.snappy.SnappyOutputStream

object PackerType extends Enumeration {
  type PackerType = Value
  val GZIP, SNAPPY, NONE, ZSTD = Value
}

case class PackedMetadata(t: PackerType)

abstract class Packer[T <: GeneratedMessageV3] {
  val packerType: PackerType

  protected def compressStream(stream: OutputStream): OutputStream

  private def pack(protoObj: T): Array[Byte] = {
    val outStream = new ByteArrayOutputStream
    val compressedStream = compressStream(outStream)
    if (compressedStream != null) {
      IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream)
      compressedStream.close() // this flushes the data to final outStream
      outStream.toByteArray
    } else {
      protoObj.toByteArray
    }
  }

  def apply(protoObj: T): PackedMessage[T] = {
    PackedMessage(protoObj, pack, PackedMetadata(packerType))
  }
}

class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.NONE
  override protected def compressStream(stream: OutputStream): OutputStream = null
}

class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.SNAPPY
  override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream)
}


class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.ZSTD
  override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream)
}

class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.GZIP
  override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream)
} 
Example 71
Source File: ParquetIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.Files

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import com.spotify.ratatool.scalacheck._
import org.apache.commons.io.FileUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ParquetIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "ParquetIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream(in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(genericData, genericSchema, file)
    val result = ParquetIO.readFromFile(file).toList
    result should equal (genericData)
    FileUtils.deleteDirectory(dir.toFile)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(specificData, specificSchema, file)
    val result = ParquetIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
    FileUtils.deleteDirectory(dir.toFile)
  }

} 
Example 72
Source File: AvroIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import org.apache.avro.generic.GenericRecord
import com.spotify.ratatool.scalacheck._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class AvroIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "AvroIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[GenericRecord](in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(genericData, genericSchema, file)
    val result = AvroIO.readFromFile[GenericRecord](file).toList
    result should equal (genericData)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(specificData, specificSchema, file)
    val result = AvroIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }
} 
Example 73
Source File: TableRowJsonIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.scalacheck._
import org.scalacheck.Gen
import scala.jdk.CollectionConverters._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class TableRowJsonIOTest extends AnyFlatSpec with Matchers {

  
  private def floatGen = Gen.choose[Float](0.0F, 1.0F)

  private val schema = Schemas.tableSchema
  private val data = Gen.listOfN(100,
    tableRowOf(schema)
      .amend(Gen.oneOf(
        Gen.const(null),
        floatGen
      ))(_.getRecord("nullable_fields").set("float_field"))
      .amend(floatGen)(_.getRecord("required_fields").set("float_field"))
      .amend(Gen.nonEmptyListOf(floatGen)
        .map(_.asJava)
      )(_.getRecord("repeated_fields").set("float_field"))
  ).sample.get

  "TableRowJsonIO" should "work with stream" in {
    val out = new ByteArrayOutputStream()
    TableRowJsonIO.writeToOutputStream(data, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = TableRowJsonIO.readFromInputStream(in).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

  it should "work with file" in {
    val file = File.createTempFile("ratatool-", ".json")
    file.deleteOnExit()
    TableRowJsonIO.writeToFile(data, file)
    val result = TableRowJsonIO.readFromFile(file).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

} 
Example 74
Source File: GenericSerde.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.kafka

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroFormat, AvroInputStream, AvroOutputStream, AvroSchema, BinaryFormat, DataFormat, Decoder, Encoder, JsonFormat, SchemaFor}
import org.apache.avro.Schema
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}


class GenericSerde[T >: Null : SchemaFor : Encoder : Decoder](avroFormat: AvroFormat = BinaryFormat) extends Serde[T]
  with Deserializer[T]
  with Serializer[T]
  with Serializable {

  val schema: Schema = AvroSchema[T]

  override def serializer(): Serializer[T] = this

  override def deserializer(): Deserializer[T] = this

  override def deserialize(topic: String, data: Array[Byte]): T = {
    if (data == null) null else {

      val avroInputStream = avroFormat match {
        case BinaryFormat => AvroInputStream.binary[T]
        case JsonFormat => AvroInputStream.json[T]
        case DataFormat => AvroInputStream.data[T]
      }

      val input = avroInputStream.from(data).build(schema)
      val result = input.iterator.next()
      input.close()
      result
    }
  }

  override def close(): Unit = ()

  override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()

  override def serialize(topic: String, data: T): Array[Byte] = {
    val baos = new ByteArrayOutputStream()

    val avroOutputStream = avroFormat match {
      case BinaryFormat => AvroOutputStream.binary[T]
      case JsonFormat => AvroOutputStream.json[T]
      case DataFormat => AvroOutputStream.data[T]
    }

    val output = avroOutputStream.to(baos).build()
    output.write(data)
    output.close()
    baos.toByteArray
  }
} 
Example 75
Source File: GithubIssue193.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroFixed, AvroInputStream, AvroOutputStream}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

case class Data(uuid: Option[UUID])
case class UUID(@AvroFixed(8) bytes: Array[Byte])

class GithubIssue193 extends AnyFunSuite with Matchers {

  test("Converting data with an optional fixed type field to GenericRecord fails #193") {

    val baos = new ByteArrayOutputStream()

    val output = AvroOutputStream.data[Data].to(baos).build()
    output.write(Data(Some(UUID(Array[Byte](0, 1, 2, 3, 4, 5, 6, 7)))))
    output.write(Data(None))
    output.write(Data(Some(UUID(Array[Byte](7, 6, 5, 4, 3, 2, 1, 0)))))
    output.close()

    val input = AvroInputStream.data[Data].from(baos.toByteArray).build
    val datas = input.iterator.toList
    datas.head.uuid.get.bytes should equal(Array[Byte](0, 1, 2, 3, 4, 5, 6, 7))
    datas(1).uuid shouldBe None
    datas.last.uuid.get.bytes should equal(Array[Byte](7, 6, 5, 4, 3, 2, 1, 0))
    input.close()
  }
} 
Example 76
Source File: GithubIssue191.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema}
import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput}
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

final case class SN(value: String) extends AnyVal
final case class SimpleUser(name: String, sn: Option[SN])

class GithubIssue191 extends AnyFunSuite with Matchers {

  test("writing out AnyVal in an option") {
    implicit val schema = AvroSchema[SimpleUser]
    val bytes = new ByteArrayOutputStream
    val out = AvroOutputStream.data[SimpleUser].to(bytes).build()
    out.write(SimpleUser("Tom", Some(SN("123"))))
    out.close()

    val datumReader = new GenericDatumReader[GenericRecord](schema)
    val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes.toByteArray), datumReader)
    val record = new Iterator[GenericRecord] {
      override def hasNext: Boolean = dataFileReader.hasNext
      override def next(): GenericRecord = dataFileReader.next
    }.toList.head
    record.getSchema shouldBe schema
    record.get("name") shouldBe new Utf8("Tom")
    record.get("sn") shouldBe new Utf8("123")
  }
} 
Example 77
Source File: GithubIssue235.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{Decoder, Encoder, RecordFormat, SchemaFor}
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

case class Label(value: String) extends AnyVal
case class Value[A](label: Label, value: A)

sealed trait OneOrTwo[A]
case class One[A](value: Value[A]) extends OneOrTwo[A]
case class Two[A](first: Value[A], second: Value[A]) extends OneOrTwo[A]
case class OneOrTwoWrapper[A](t: OneOrTwo[A])

object Bug {

  def apply[T <: Product](a: T)(
    implicit schemaFor: SchemaFor[T],
    encoder: Encoder[T],
    decoder: Decoder[T]
  ): Unit = {

    val format = RecordFormat[T]
    val schema = schemaFor.schema
    val datumReader = new GenericDatumReader[GenericRecord](schema)
    val datumWriter = new GenericDatumWriter[GenericRecord](schema)

    val stream = new ByteArrayOutputStream()
    val bEncoder = EncoderFactory.get().binaryEncoder(stream, null)

    datumWriter.write(format.to(a), bEncoder)
    bEncoder.flush()

    val bytes = stream.toByteArray
    val bDecoder = DecoderFactory.get().binaryDecoder(bytes, null)
    val record = datumReader.read(null, bDecoder)
    require(format.from(record) == a)
  }

}

class GithubIssue235 extends AnyFunSuite with Matchers {
  test("Broken typeclass derivation upgrading from 1.9.0 to 2.0.1 #235") {
    val o = OneOrTwoWrapper(One(Value(Label("lbl"), "foo")))
    Bug(o)
  }
} 
Example 78
Source File: GithubIssue485.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.CPWrapper
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import shapeless.Coproduct

class GithubIssue485 extends AnyFunSuite with Matchers {

  test("Serializable Coproduct Decoder #485") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[CPWrapper])
    oos.close()

    val decoder =
      new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)).readObject().asInstanceOf[Decoder[CPWrapper]]

    val schema = AvroSchema[CPWrapper]
    val record = new GenericData.Record(schema)
    record.put("u", new Utf8("wibble"))
    decoder.decode(record) shouldBe CPWrapper(Coproduct[CPWrapper.ISBG]("wibble"))
  }
} 
Example 79
Source File: GithubIssue484.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.ScalaEnumClass
import com.sksamuel.avro4s.schema.Colours
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.generic.GenericData.EnumSymbol
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class GithubIssue484 extends AnyFunSuite with Matchers {

  test("Serializable Scala Enum Decoder #484") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[ScalaEnumClass])
    oos.close()

    val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
      .readObject()
      .asInstanceOf[Decoder[ScalaEnumClass]]

    val schema = AvroSchema[ScalaEnumClass]
    val record = new GenericData.Record(schema)
    record.put("colour", new EnumSymbol(schema.getField("colour").schema(), "Green"))
    decoder.decode(record) shouldBe ScalaEnumClass(Colours.Green)
  }
} 
Example 80
Source File: GithubIssue432.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.Encoder
import org.scalatest.{FunSuite, Matchers}

class GithubIssue432 extends FunSuite with Matchers {

  test("Serializable Encoder[BigDecimal] #432") {
    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()
  }

  test("Deserialized Encoder[BigDecimal] works") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()

    val ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
    val encoder = ois.readObject().asInstanceOf[Encoder[BigDecimal]]

    encoder.encode(12.34)
  }
} 
Example 81
Source File: BinaryOutputStreamTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.streams.output

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroInputStream, AvroOutputStream, AvroSchema, Encoder}
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

case class Work(name: String, year: Int, style: Style)
case class Composer(name: String, birthplace: String, works: Seq[Work])

class BinaryStreamsTest extends AnyWordSpec with Matchers {

  val ennio = Composer("ennio morricone", "rome", Seq(Work("legend of 1900", 1986, Style.Classical), Work("ecstasy of gold", 1969, Style.Classical)))
  val hans = Composer("hans zimmer", "frankfurt", Seq(Work("batman begins", 2007, Style.Modern), Work("dunkirk", 2017, Style.Modern)))

  "Avro binary streams" should {
    "not write schemas" in {

      implicit val schema = AvroSchema[Composer]
      implicit val encoder = Encoder[Composer]

      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.binary[Composer].to(baos).build()
      output.write(ennio)
      output.write(hans)
      output.close()

      // the schema should not be written in a binary stream
      new String(baos.toByteArray) should not include "birthplace"
      new String(baos.toByteArray) should not include "compositions"
      new String(baos.toByteArray) should not include "year"
      new String(baos.toByteArray) should not include "style"
    }
    "read and write" in {

      implicit val schema = AvroSchema[Composer]
      implicit val encoder = Encoder[Composer]

      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.binary[Composer].to(baos).build()
      output.write(ennio)
      output.write(hans)
      output.close()

      val in = AvroInputStream.binary[Composer].from(baos.toByteArray).build(schema)
      in.iterator.toList shouldBe List(ennio, hans)
      in.close()
    }
  }
} 
Example 82
Source File: AvroDataOutputStreamCodecTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.streams.output

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema}
import org.apache.avro.file.CodecFactory
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

class AvroDataOutputStreamCodecTest extends AnyWordSpec with Matchers {

  case class Composer(name: String, birthplace: String, compositions: Seq[String])
  val schema = AvroSchema[Composer]
  val ennio = Composer("ennio morricone", "rome", Seq("legend of 1900", "ecstasy of gold"))

  "AvroDataOutputStream" should {
    "include schema" in {
      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.data[Composer].to(baos).build()
      output.write(ennio)
      output.close()
      new String(baos.toByteArray) should include("birthplace")
      new String(baos.toByteArray) should include("compositions")
    }

    "include deflate coded in metadata when serialized with deflate" in {
      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.data[Composer].to(baos).withCodec(CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL)).build()
      output.write(ennio)
      output.close()
      new String(baos.toByteArray) should include("deflate")
    }

    "include bzip2 coded in metadata when serialized with bzip2" in {
      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.data[Composer].to(baos).withCodec(CodecFactory.bzip2Codec).build()
      output.write(ennio)
      output.close()
      new String(baos.toByteArray) should include("bzip2")
    }
  }
} 
Example 83
Source File: OutputStreamTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.streams.output

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s._
import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput}
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.avro.io.DecoderFactory
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

trait OutputStreamTest extends AnyFunSuite with Matchers {

  def readData[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readData(out.toByteArray)
  def readData[T: SchemaFor](bytes: Array[Byte]): GenericRecord = {
    val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T])
    val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes), datumReader)
    dataFileReader.next
  }

  def writeData[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.data[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def readBinary[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readBinary(out.toByteArray)
  def readBinary[T: SchemaFor](bytes: Array[Byte]): GenericRecord = {
    val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T])
    val decoder = DecoderFactory.get().binaryDecoder(new SeekableByteArrayInput(bytes), null)
    datumReader.read(null, decoder)
  }

  def writeBinary[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.binary[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def readJson[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readJson(out.toByteArray)
  def readJson[T: SchemaFor](bytes: Array[Byte]): GenericRecord = {
    val schema = AvroSchema[T]
    val datumReader = new GenericDatumReader[GenericRecord](schema)
    val decoder = DecoderFactory.get().jsonDecoder(schema, new SeekableByteArrayInput(bytes))
    datumReader.read(null, decoder)
  }

  def writeJson[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.json[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def writeRead[T: Encoder : SchemaFor](t: T)(fn: GenericRecord => Any): Unit = {
    {
      val out = writeData(t)
      val record = readData(out)
      fn(record)
    }
    {
      val out = writeBinary(t)
      val record = readBinary(out)
      fn(record)
    }
    {
      val out = writeJson(t)
      val record = readJson(out)
      fn(record)
    }
  }
} 
Example 84
Source File: InputStreamTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.streams.input

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s._
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

trait InputStreamTest extends AnyFunSuite with Matchers {

  def readData[T: SchemaFor: Decoder](out: ByteArrayOutputStream): T = readData(out.toByteArray)
  def readData[T: SchemaFor: Decoder](bytes: Array[Byte]): T = {
    AvroInputStream.data.from(bytes).build(implicitly[SchemaFor[T]].schema).iterator.next()
  }

  def writeData[T: Encoder: SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.data[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def readBinary[T: SchemaFor: Decoder](out: ByteArrayOutputStream): T = readBinary(out.toByteArray)
  def readBinary[T: SchemaFor: Decoder](bytes: Array[Byte]): T = {
    AvroInputStream.binary.from(bytes).build(implicitly[SchemaFor[T]].schema).iterator.next()
  }

  def writeBinary[T: Encoder](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.binary[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def writeRead[T: Encoder: Decoder: SchemaFor](t: T): Unit = {
    {
      val out = writeData(t)
      readData(out) shouldBe t
    }
    {
      val out = writeBinary(t)
      readBinary(out) shouldBe t
    }
  }

  def writeRead[T: Encoder: Decoder: SchemaFor](t: T, expected: T): Unit = {
    {
      val out = writeData(t)
      readData(out) shouldBe expected
    }
    {
      val out = writeBinary(t)
      readBinary(out) shouldBe expected
    }
  }
} 
Example 85
Source File: SchemaEvolutionTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.record.decoder

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.sksamuel.avro4s._
import org.apache.avro.SchemaBuilder
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class SchemaEvolutionTest extends AnyFunSuite with Matchers {

  case class Version1(original: String)
  case class Version2(@AvroAlias("original") renamed: String)

  case class P1(name: String, age: Int = 18)
  case class P2(name: String)

  case class OptionalStringTest(a: String, b: Option[String])
  case class DefaultStringTest(a: String, b: String = "foo")

  ignore("@AvroAlias should be used when a reader schema has a field missing from the write schema") {

    val v1schema = AvroSchema[Version1]
    val v1 = Version1("hello")
    val baos = new ByteArrayOutputStream()
    val output = AvroOutputStream.data[Version1].to(baos).build()
    output.write(v1)
    output.close()

    // we load using a v2 schema
    val is = new AvroDataInputStream[Version2](new ByteArrayInputStream(baos.toByteArray), Some(v1schema))
    val v2 = is.iterator.toList.head

    v2.renamed shouldBe v1.original
  }

  test("when decoding, if the record and schema are missing a field and the target has a scala default, use that") {

    val f1 = RecordFormat[P1]
    val f2 = RecordFormat[P2]

    f1.from(f2.to(P2("foo"))) shouldBe P1("foo")
  }

  test("when decoding, if the record is missing a field that is present in the schema with a default, use the default from the schema") {
    val schema = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val record = new GenericData.Record(schema)
    record.put("a", new Utf8("hello"))
    Decoder[DefaultStringTest].decode(record) shouldBe DefaultStringTest("hello")
  }

  test("when decoding, if the record is missing a field that is present in the schema and the type is option, then set to None") {
    val schema1 = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val schema2 = SchemaBuilder.record("foo").fields().requiredString("a").optionalString("b").endRecord()
    val record = new GenericData.Record(schema1)
    record.put("a", new Utf8("hello"))
    Decoder[OptionalStringTest].decode(record) shouldBe OptionalStringTest("hello", None)
  }
} 
Example 86
Source File: Encoding.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package benchmarks

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer

import benchmarks.record._
import com.sksamuel.avro4s._
import org.apache.avro.generic.{GenericDatumWriter, GenericRecord}
import org.apache.avro.io.EncoderFactory
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

object Encoding extends BenchmarkHelpers {

  @State(Scope.Thread)
  class Setup {
    val record = RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t))

    val specificRecord = {
      import benchmarks.record.generated.AttributeValue._
      import benchmarks.record.generated._
      new RecordWithUnionAndTypeField(new ValidInt(255, t))
    }

    val (avro4sEncoder, avro4sWriter) = {
      val schema = AvroSchema[RecordWithUnionAndTypeField]
      val encoder = Encoder[RecordWithUnionAndTypeField]
      val writer = new GenericDatumWriter[GenericRecord](schema)
      (encoder, writer)
    }

    val (handrolledEncoder, handrolledWriter) = {
      import benchmarks.handrolled_codecs._
      implicit val codec: AttributeValueCodec[Int] = AttributeValueCodec[Int]
      implicit val schemaForValid = codec.schemaForValid
      val schema = AvroSchema[RecordWithUnionAndTypeField]
      val encoder = Encoder[RecordWithUnionAndTypeField]
      val writer = new GenericDatumWriter[GenericRecord](schema)
      (encoder, writer)
    }

  }
}

class Encoding extends CommonParams with BenchmarkHelpers {

  import Encoding._

  def encode[T](value: T, encoder: Encoder[T], writer: GenericDatumWriter[GenericRecord]): ByteBuffer = {
    val outputStream = new ByteArrayOutputStream(512)
    val record = encoder.encode(value).asInstanceOf[GenericRecord]
    val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null)
    writer.write(record, enc)
    ByteBuffer.wrap(outputStream.toByteArray)
  }


  @Benchmark
  def avroSpecificRecord(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(setup.specificRecord.toByteBuffer)

  @Benchmark
  def avro4sGenerated(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(encode(setup.record, setup.avro4sEncoder, setup.avro4sWriter))

  @Benchmark
  def avro4sHandrolled(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(encode(setup.record, setup.handrolledEncoder, setup.handrolledWriter))
} 
Example 87
Source File: Decoding.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package benchmarks

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.util.Collections

import benchmarks.record._
import com.sksamuel.avro4s._
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import org.apache.avro.util.ByteBufferInputStream
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

object Decoding extends BenchmarkHelpers {
  @State(Scope.Thread)
  class Setup {
    val avroBytes = {
      import benchmarks.record.generated.AttributeValue._
      import benchmarks.record.generated._
      new RecordWithUnionAndTypeField(new ValidInt(255, t)).toByteBuffer
    }

    val avro4sBytes = encode(RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t)))

    val (handrolledDecoder, handrolledReader) = {
      import benchmarks.handrolled_codecs._
      implicit val codec: Codec[AttributeValue[Int]] = AttributeValueCodec[Int]
      implicit val schemaFor: SchemaFor[AttributeValue[Int]] = SchemaFor[AttributeValue[Int]](codec.schema)
      val recordSchemaFor = SchemaFor[RecordWithUnionAndTypeField]
      val decoder = Decoder[RecordWithUnionAndTypeField].withSchema(recordSchemaFor)
      val reader = new GenericDatumReader[GenericRecord](recordSchemaFor.schema)
      (decoder, reader)
    }

    val (avro4sDecoder, avro4sReader) = {
      val decoder = Decoder[RecordWithUnionAndTypeField]
      val reader = new GenericDatumReader[GenericRecord](decoder.schema)
      (decoder, reader)
    }
  }

  def encode[T: Encoder: SchemaFor](value: T): ByteBuffer = {
    val outputStream = new ByteArrayOutputStream(512)
    val encoder = Encoder[T]
    val schema = AvroSchema[T]
    val record = encoder.encode(value).asInstanceOf[GenericRecord]
    val writer = new GenericDatumWriter[GenericRecord](schema)
    val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null)
    writer.write(record, enc)
    ByteBuffer.wrap(outputStream.toByteArray)
  }
}

class Decoding extends CommonParams with BenchmarkHelpers {

  import Decoding._

  def decode[T](bytes: ByteBuffer, decoder: Decoder[T], reader: GenericDatumReader[GenericRecord]): T = {
    val dec =
      DecoderFactory.get().binaryDecoder(new ByteBufferInputStream(Collections.singletonList(bytes.duplicate)), null)
    val record = reader.read(null, dec)
    decoder.decode(record)
  }


  @Benchmark
  def avroSpecificRecord(setup: Setup, blackhole: Blackhole) = {
    import benchmarks.record.generated._
    blackhole.consume(RecordWithUnionAndTypeField.fromByteBuffer(setup.avroBytes.duplicate))
  }

  @Benchmark
  def avro4sHandrolled(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(decode(setup.avro4sBytes, setup.handrolledDecoder, setup.handrolledReader))

  @Benchmark
  def avro4sGenerated(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(decode(setup.avro4sBytes, setup.avro4sDecoder, setup.avro4sReader))
} 
Example 88
Source File: StyleChecker.scala    From big-data-scala-spark   with MIT License 5 votes vote down vote up
import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import com.typesafe.config.ConfigFactory

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec](stream: PrintStream) extends Output[T] {
    private val messageHelper = new MessageHelper(ConfigFactory.load())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        stream.print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) stream.println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) stream.println("")
      fileCount += 1
      stream.println("  " + fileCount + ". " + level + pos(line, column) + ":")
      stream.println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(lineNumber) => " at line " + lineNumber + (column match {
        case Some(columnNumber) => " character " + columnNumber
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = new CustomTextOutput(new PrintStream(output)).output(messages)

    val msg = s"""${output.toString}
                 |Processed ${outputResult.files}  file(s)
                 |Found ${outputResult.errors} errors
                 |Found ${outputResult.warnings} warnings
                 |""".stripMargin

    (msg, score(outputResult))
  }
} 
Example 89
Source File: StyleChecker.scala    From big-data-scala-spark   with MIT License 5 votes vote down vote up
import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import com.typesafe.config.ConfigFactory

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec](stream: PrintStream) extends Output[T] {
    private val messageHelper = new MessageHelper(ConfigFactory.load())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        stream.print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) stream.println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) stream.println("")
      fileCount += 1
      stream.println("  " + fileCount + ". " + level + pos(line, column) + ":")
      stream.println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(lineNumber) => " at line " + lineNumber + (column match {
        case Some(columnNumber) => " character " + columnNumber
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = new CustomTextOutput(new PrintStream(output)).output(messages)

    val msg = s"""${output.toString}
                 |Processed ${outputResult.files}  file(s)
                 |Found ${outputResult.errors} errors
                 |Found ${outputResult.warnings} warnings
                 |""".stripMargin

    (msg, score(outputResult))
  }
} 
Example 90
Source File: StyleChecker.scala    From big-data-scala-spark   with MIT License 5 votes vote down vote up
import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import com.typesafe.config.ConfigFactory

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec](stream: PrintStream) extends Output[T] {
    private val messageHelper = new MessageHelper(ConfigFactory.load())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        stream.print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) stream.println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) stream.println("")
      fileCount += 1
      stream.println("  " + fileCount + ". " + level + pos(line, column) + ":")
      stream.println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(lineNumber) => " at line " + lineNumber + (column match {
        case Some(columnNumber) => " character " + columnNumber
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = new CustomTextOutput(new PrintStream(output)).output(messages)

    val msg = s"""${output.toString}
                 |Processed ${outputResult.files}  file(s)
                 |Found ${outputResult.errors} errors
                 |Found ${outputResult.warnings} warnings
                 |""".stripMargin

    (msg, score(outputResult))
  }
} 
Example 91
Source File: JsDataSpec.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package mist.api.data

import java.io.{ByteArrayOutputStream, ObjectOutputStream}
import java.util
import mist.api.encoding.defaultEncoders._
import mist.api.encoding.JsSyntax._

import org.scalatest._
import org.scalatest.prop.TableDrivenPropertyChecks._

class JsDataSpec extends FunSpec with Matchers {
  import java.{lang => jl, util => ju}
  val rawToData = Table(
    ("raw", "data"),
    (1, JsNumber(1)),
    ("str", JsString("str")),
    (1.2, JsNumber(1.2)),
    (List(1, 2), JsList(Seq(JsNumber(1), JsNumber(2)))),
    (Array(1, 2), JsList(Seq(JsNumber(1), JsNumber(2)))),
    (Map("key" -> "value"), JsMap(Map("key" -> JsString("value"))))
  )

  val javaMap: ju.Map[String, jl.Integer] = {
    val m = new ju.HashMap[String, jl.Integer](1)
    m.put("test", new jl.Integer(42))
    m
  }

  val javaRawToData = Table(
    ("raw", "data"),
    (new jl.Integer(42), JsNumber(42)),
    (new jl.Double(42.0), JsNumber(42.0)),
    (ju.Arrays.asList(new jl.Integer(42)), JsList(Seq(JsNumber(42)))),
    (javaMap, JsMap(Map("test"-> JsNumber(42))))
  )


  it("should parse raw any structure") {
    forAll(rawToData) { (raw: Any, jsLike: JsData) =>
      JsData.fromScala(raw) shouldBe jsLike
    }
  }
  it("should parse raw any java structure") {
    forAll(javaRawToData){ (raw: Any, jsLike: JsData) =>
      JsData.fromJava(raw) shouldBe jsLike
    }
  }

  describe("JsLikeMap") {

    // problem with MapLike - akka can't serialize it
    // scala.collection.immutable.MapLike$$anon$2
    //    java.io.NotSerializableException: scala.collection.immutable.MapLike$$anon$2
    it("JsLikeMap should be serializable") {
      val map = Map("1" -> 1, "2" -> 2).mapValues(i => JsNumber(i))
      val jslikeMap = JsMap(map)

      val bos = new ByteArrayOutputStream
      val out = new ObjectOutputStream(bos)
      out.writeObject(jslikeMap)
      out.close()
    }
  }

  it("should return untyped map") {
    val js = JsMap(
      "a" -> 1.js,
      "b" -> false.js,
      "c" -> JsList(Seq(
        JsMap("x" -> "y".js)
      ))
    )
    val exp = Map(
      "a" -> 1,
      "b" -> false,
      "c" -> Seq(
        Map("x" -> "y")
      )
    )
    JsData.untyped(js) shouldBe exp
  }

} 
Example 92
Source File: ProtoMarshaller.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
class ProtoMarshaller[T <: com.google.protobuf.Message](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
} 
Example 93
Source File: Gzip.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import java.util.zip.{ GZIPInputStream, GZIPOutputStream }

import akka.util.ByteString

object Gzip extends Codec {
  override val name: String = "gzip"

  override def compress(uncompressed: ByteString): ByteString = {
    val baos = new ByteArrayOutputStream(uncompressed.size)
    val gzos = new GZIPOutputStream(baos)
    gzos.write(uncompressed.toArray)
    gzos.flush()
    gzos.close()
    ByteString(baos.toByteArray)
  }

  override def uncompress(compressed: ByteString): ByteString = {
    val gzis = new GZIPInputStream(new ByteArrayInputStream(compressed.toArray))

    val baos = new ByteArrayOutputStream(compressed.size)
    val buffer = new Array[Byte](32 * 1024)
    var read = gzis.read(buffer)
    while (read != -1) {
      baos.write(buffer, 0, read)
      read = gzis.read(buffer)
    }
    ByteString(baos.toByteArray)
  }
} 
Example 94
Source File: Marshaller.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
final class Marshaller[T <: scalapb.GeneratedMessage](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
} 
Example 95
Source File: Main.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.gen

import java.io.ByteArrayOutputStream

import com.google.protobuf.compiler.PluginProtos.CodeGeneratorRequest
import akka.grpc.gen.javadsl.{ JavaClientCodeGenerator, JavaInterfaceCodeGenerator, JavaServerCodeGenerator }
import akka.grpc.gen.scaladsl.{ ScalaClientCodeGenerator, ScalaServerCodeGenerator, ScalaTraitCodeGenerator }

// This is the protoc plugin that the gradle plugin uses
object Main extends App {
  val inBytes: Array[Byte] = {
    val baos = new ByteArrayOutputStream(math.max(64, System.in.available()))
    val buffer = new Array[Byte](32 * 1024)

    var bytesRead = System.in.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = System.in.read(buffer)
    }
    baos.toByteArray
  }

  val req = CodeGeneratorRequest.parseFrom(inBytes)
  val KeyValueRegex = """([^=]+)=(.*)""".r
  val parameters = req.getParameter
    .split(",")
    .flatMap {
      case KeyValueRegex(key, value) => Some((key.toLowerCase, value))
      case _                         => None
    }
    .toMap

  private val languageScala: Boolean = parameters.get("language").map(_.equalsIgnoreCase("scala")).getOrElse(false)

  private val generateClient: Boolean =
    parameters.get("generate_client").map(!_.equalsIgnoreCase("false")).getOrElse(true)

  private val generateServer: Boolean =
    parameters.get("generate_server").map(!_.equalsIgnoreCase("false")).getOrElse(true)

  private val extraGenerators: List[String] =
    parameters.getOrElse("extra_generators", "").split(";").toList.filter(_ != "")

  private val logger = parameters.get("logfile").map(new FileLogger(_)).getOrElse(SilencedLogger)

  val out = {
    val codeGenerators =
      if (languageScala) {
        // Scala
        if (generateClient && generateServer)
          Seq(ScalaTraitCodeGenerator, ScalaClientCodeGenerator, ScalaServerCodeGenerator)
        else if (generateClient) Seq(ScalaTraitCodeGenerator, ScalaClientCodeGenerator)
        else if (generateServer) Seq(ScalaTraitCodeGenerator, ScalaServerCodeGenerator)
        else throw new IllegalArgumentException("At least one of generateClient or generateServer must be enabled")
      } else {
        // Java
        if (generateClient && generateServer)
          Seq(JavaInterfaceCodeGenerator, JavaClientCodeGenerator, JavaServerCodeGenerator)
        else if (generateClient) Seq(JavaInterfaceCodeGenerator, JavaClientCodeGenerator)
        else if (generateServer) Seq(JavaInterfaceCodeGenerator, JavaServerCodeGenerator)
        else throw new IllegalArgumentException("At least one of generateClient or generateServer must be enabled")
      }
    val loadedExtraGenerators =
      extraGenerators.map(cls => Class.forName(cls).getDeclaredConstructor().newInstance().asInstanceOf[CodeGenerator])

    (codeGenerators ++ loadedExtraGenerators).foreach { g =>
      val gout = g.run(req, logger)
      System.out.write(gout.toByteArray)
      System.out.flush()
    }
  }
} 
Example 96
Source File: Main.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.scalapb

import java.io.ByteArrayOutputStream

import scalapb.ScalaPbCodeGenerator

object Main extends App {
  val inBytes: Array[Byte] = {
    val baos = new ByteArrayOutputStream(math.max(64, System.in.available()))
    val buffer = Array.ofDim[Byte](32 * 1024)

    var bytesRead = System.in.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = System.in.read(buffer)
    }
    baos.toByteArray
  }

  val outBytes = ScalaPbCodeGenerator.run(inBytes)

  System.out.write(outBytes)
  System.out.flush()
} 
Example 97
Source File: TestingTypedCount.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
} 
Example 98
Source File: RawTextSender.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
} 
Example 99
Source File: RateLimitedOutputStreamSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes(StandardCharsets.UTF_8)) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
} 
Example 100
Source File: RBackendAuthHandler.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.r

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets.UTF_8

import io.netty.channel.{Channel, ChannelHandlerContext, SimpleChannelInboundHandler}

import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils


private class RBackendAuthHandler(secret: String)
  extends SimpleChannelInboundHandler[Array[Byte]] with Logging {

  override def channelRead0(ctx: ChannelHandlerContext, msg: Array[Byte]): Unit = {
    // The R code adds a null terminator to serialized strings, so ignore it here.
    val clientSecret = new String(msg, 0, msg.length - 1, UTF_8)
    try {
      require(secret == clientSecret, "Auth secret mismatch.")
      ctx.pipeline().remove(this)
      writeReply("ok", ctx.channel())
    } catch {
      case e: Exception =>
        logInfo("Authentication failure.", e)
        writeReply("err", ctx.channel())
        ctx.close()
    }
  }

  private def writeReply(reply: String, chan: Channel): Unit = {
    val out = new ByteArrayOutputStream()
    SerDe.writeString(new DataOutputStream(out), reply)
    chan.writeAndFlush(out.toByteArray())
  }

} 
Example 101
Source File: ByteBufferOutputStream.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


private[spark] class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
} 
Example 102
Source File: PythonRDDSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes(StandardCharsets.UTF_8), null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes(StandardCharsets.UTF_8)), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(Iterator(
      (null, null),
      ("a".getBytes(StandardCharsets.UTF_8), null),
      (null, "b".getBytes(StandardCharsets.UTF_8))), buffer)
  }
} 
Example 103
Source File: GenericAvroSerializerSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 104
Source File: TaskDescriptionSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException}
import java.nio.ByteBuffer
import java.util.Properties

import scala.collection.mutable.HashMap

import org.apache.spark.SparkFunSuite

class TaskDescriptionSuite extends SparkFunSuite {
  test("encoding and then decoding a TaskDescription results in the same TaskDescription") {
    val originalFiles = new HashMap[String, Long]()
    originalFiles.put("fileUrl1", 1824)
    originalFiles.put("fileUrl2", 2)

    val originalJars = new HashMap[String, Long]()
    originalJars.put("jar1", 3)

    val originalProperties = new Properties()
    originalProperties.put("property1", "18")
    originalProperties.put("property2", "test value")
    // SPARK-19796 -- large property values (like a large job description for a long sql query)
    // can cause problems for DataOutputStream, make sure we handle correctly
    val sb = new StringBuilder()
    (0 to 10000).foreach(_ => sb.append("1234567890"))
    val largeString = sb.toString()
    originalProperties.put("property3", largeString)
    // make sure we've got a good test case
    intercept[UTFDataFormatException] {
      val out = new DataOutputStream(new ByteArrayOutputStream())
      try {
        out.writeUTF(largeString)
      } finally {
        out.close()
      }
    }

    // Create a dummy byte buffer for the task.
    val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))

    val originalTaskDescription = new TaskDescription(
      taskId = 1520589,
      attemptNumber = 2,
      executorId = "testExecutor",
      name = "task for test",
      index = 19,
      originalFiles,
      originalJars,
      originalProperties,
      taskBuffer
    )

    val serializedTaskDescription = TaskDescription.encode(originalTaskDescription)
    val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription)

    // Make sure that all of the fields in the decoded task description match the original.
    assert(decodedTaskDescription.taskId === originalTaskDescription.taskId)
    assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber)
    assert(decodedTaskDescription.executorId === originalTaskDescription.executorId)
    assert(decodedTaskDescription.name === originalTaskDescription.name)
    assert(decodedTaskDescription.index === originalTaskDescription.index)
    assert(decodedTaskDescription.addedFiles.equals(originalFiles))
    assert(decodedTaskDescription.addedJars.equals(originalJars))
    assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties))
    assert(decodedTaskDescription.serializedTask.equals(taskBuffer))
  }
} 
Example 105
Source File: TestResults.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.util

import java.nio.file.Files
import java.nio.file.Paths
import java.nio.charset.Charset
import java.nio.charset.StandardCharsets
import java.io.ByteArrayOutputStream
import java.io.PrintWriter
import org.rogach.scallop.ScallopConf

object TestResults {
  def main(args: Array[String]) {
    val config = new TestResultConfig(args)
    println("running tests....")
    parseTestResults(config.sbtPath(),config.sbtCmd())
  }
  
  def parseTestResults(sbtPath:String = "/opt/local/bin/sbt", sbtCmd:String = "test") = {
    val procOutput = runCommand(Seq(sbtPath,sbtCmd))._2.replaceAll("""\x1b\[[0-9;]*[a-zA-Z]""", "")
    
    val pattern = """(?m)^.*\[info\] Total.*$|^.*\[info\] Finished.*$|^.*\[info\] [\d]+ examp.*$""".r
    
    val header = "test_name,seconds,examples,expectations,failures,errors,skipped\n"
    
    val pattern2 = """\[info\] Total for specification (\w+)\s+\[info\] Finished in (.+)\R\[info\] (.+)\R""".r
    val pattern3 = """([a-zA-Z]+): (?:(\d+) minutes? )?(?:(\d+) seconds?[,:] )?(?:(\d+) ms[,:] )?(\d+) examples?, (?:(\d+) expectations?, )?(\d+) failures?, (\d+) errors?(?:, (\d+) skipped)?""".r
    val string = pattern2.findAllMatchIn(procOutput).map(mat => s"${mat.group(1)}: ${mat.group(2)}: ${mat.group(3)}")
      .map(nline => nline match {
        case pattern3(test_name,minutes,seconds,ms,examples,expectations,failures,errors,skipped) => {
          val allseconds = (minutes match {
            case "" => 0
            case null => 0
            case x => x.toInt*60
          }) + (seconds match {
            case "" => 0
            case null => 0
            case x => x.toInt
          }) +  (ms match {
            case "" => 0.0
            case null => 0.0
            case x => x.toDouble/1000.0
          })
          s"$test_name,$allseconds,$examples,$expectations,$failures,$errors,$skipped"
        }
      }).mkString("\n")
    
    val outStr = header + string
      
    println(outStr)
    Files.write(Paths.get("test_output.csv"), outStr.getBytes(StandardCharsets.UTF_8))
  }
  
  import sys.process._
  def runCommand(cmd: Seq[String]): (Int, String, String) = {
    val stdoutStream = new ByteArrayOutputStream
    val stderrStream = new ByteArrayOutputStream
    val stdoutWriter = new PrintWriter(stdoutStream)
    val stderrWriter = new PrintWriter(stderrStream)
    val exitValue = cmd.!(ProcessLogger(stdoutWriter.println, stderrWriter.println))
    stdoutWriter.close()
    stderrWriter.close()
    (exitValue, stdoutStream.toString, stderrStream.toString)
  }
  
  
}

class TestResultConfig(arguments: Seq[String]) extends ScallopConf(arguments)
{
  val experimental = opt[List[String]]("X", default = Some(List[String]()))
  val sparkHost = opt[String]("sparkHost", descr = "The IP or hostname of the spark master",
    default = Some("spark-master.local"))
  val sparkPort = opt[String]("sparkPort", descr = "The port of the spark master",
    default = Some("7077"))
  val sbtPath = opt[String]("sbtPath", descr = "The path to sbt binary",
    default = Some("/opt/local/bin/sbt"))
  val sbtCmd = opt[String]("sbtCmd", descr = "The sbt command to run",
    default = Some("test"))
} 
Example 106
Source File: SageMakerProtobufWriter.scala    From sagemaker-spark   with Apache License 2.0 5 votes vote down vote up
package com.amazonaws.services.sagemaker.sparksdk.protobuf

import java.io.ByteArrayOutputStream

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext}

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types.StructType


  def write(row: Row): Unit = {
    val labelColumnName = options.getOrElse("labelColumnName", "label")
    val featuresColumnName = options.getOrElse("featuresColumnName", "features")

    val record = ProtobufConverter.rowToProtobuf(row, featuresColumnName, Some(labelColumnName))
    record.writeTo(byteArrayOutputStream)

    recordWriter.write(NullWritable.get(), new BytesWritable(byteArrayOutputStream.toByteArray))
    byteArrayOutputStream.reset()
  }

  override def close(): Unit = {
    recordWriter.close(context)
  }
} 
Example 107
Source File: RecordIOOutputFormatTests.scala    From sagemaker-spark   with Apache License 2.0 5 votes vote down vote up
package com.amazonaws.services.sagemaker.sparksdk.protobuf

import java.io.ByteArrayOutputStream

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.hadoop.mapreduce.TaskAttemptContext
import org.mockito.Matchers.any
import org.mockito.Mockito.{verify, when}
import org.scalatest.{BeforeAndAfter, FlatSpec}
import org.scalatest.mock.MockitoSugar

import com.amazonaws.services.sagemaker.sparksdk.protobuf.RecordIOOutputFormat.SageMakerProtobufRecordWriter


class RecordIOOutputFormatTests extends FlatSpec with MockitoSugar with BeforeAndAfter {

  var sagemakerProtobufRecordWriter: SageMakerProtobufRecordWriter = _
  var mockOutputStream : FSDataOutputStream = _
  var byteArrayOutputStream: ByteArrayOutputStream = _
  var mockTaskAttemptContext: TaskAttemptContext = _
  var mockPath: Path = _
  var mockFileSystem: FileSystem = _

  before {
    byteArrayOutputStream = new ByteArrayOutputStream()
    mockOutputStream = mock[FSDataOutputStream]
    sagemakerProtobufRecordWriter = new SageMakerProtobufRecordWriter(mockOutputStream)
    mockTaskAttemptContext = mock[TaskAttemptContext]
    mockPath = mock[Path]
    mockFileSystem = mock[FileSystem]
  }

  it should "write an empty array of bytes" in {
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)

    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)
    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }


  it should "write an array of bytes" in {
    val byteArray = Array[Byte](0, 0, 0, 0)
    byteArrayOutputStream.write(byteArray)
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)
    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)

    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }

  it should "write an array of bytes, padding as necessary" in {
    byteArrayOutputStream.write(5)
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)
    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)

    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }

  it should "write an array of bytes, padding only as much as necessary" in {
    byteArrayOutputStream.write(Array[Byte](0, 0, 0, 0, 0))
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)
    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)

    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }

  it should "create a record writer from a FSDataOutputStream created by the filesystem" in {
    val mockTaskAttemptContext = mock[TaskAttemptContext]
    val mockPath = mock[Path]
    val mockFileSystem = mock[FileSystem]
    when(mockPath.getFileSystem(any[Configuration])).thenReturn(mockFileSystem)
    new RecordIOOutputFormat() {
      override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
        mockPath
      }
    }.getRecordWriter(mockTaskAttemptContext)
    verify(mockFileSystem).create(mockPath, true)

  }

} 
Example 108
Source File: ProcessSpawner.scala    From akka-http-health   with MIT License 5 votes vote down vote up
package io.github.lhotari.akka.http.health

import java.io.ByteArrayOutputStream
import java.lang.System.getProperty
import java.net.{URL, URLClassLoader}

import org.apache.commons.io.IOUtils

import scala.collection.JavaConverters._
import scala.reflect.runtime.universe._

case class ProcessResult(retval: Integer, output: String)

trait ProcessSpawner {
  lazy val classpath = resolveClassPath()
  val sep = getProperty("file.separator")
  val javaExecutablePath = getProperty("java.home") + sep + "bin" + sep + "java"

  private def resolveClassPath() = {
    getClass.getClassLoader match {
      case urlClassLoader: URLClassLoader =>
        urlClassLoader.getURLs.collect {
          case url: URL => url.getFile
        }.mkString(getProperty("path.separator"))
      case _ =>
        getProperty("java.class.path")
    }
  }

  def executeInSeparateProcess[T](mainClassType: T, maxMemoryMB: Integer = 100, extraJvmOpts: Seq[String] = Nil, args: Seq[String] = Nil)(implicit tag: WeakTypeTag[T]): ProcessResult = {
    val className = tag.tpe.termSymbol.fullName
    val processBuilder = new ProcessBuilder(javaExecutablePath).redirectErrorStream(true)
    val commands = processBuilder.command()
    commands.add(s"-Xmx${maxMemoryMB}m")
    commands.addAll(extraJvmOpts.asJava)
    commands.add("-cp")
    commands.add(classpath)
    commands.add(className)
    commands.addAll(args.asJava)
    println(String.join(" ", commands))
    val process = processBuilder.start()
    val output = new ByteArrayOutputStream()
    IOUtils.copy(process.getInputStream, output)
    ProcessResult(process.waitFor(), output.toString())
  }
} 
Example 109
Source File: package.scala    From pulsar4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.pulsar4s

import java.io.ByteArrayOutputStream
import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import com.sksamuel.avro4s.AvroSchema
import com.sksamuel.avro4s.AvroInputStream
import com.sksamuel.avro4s.AvroOutputStream
import com.sksamuel.avro4s.Decoder
import com.sksamuel.avro4s.Encoder
import com.sksamuel.avro4s.SchemaFor
import org.apache.pulsar.client.api.Schema
import org.apache.pulsar.common.schema.{SchemaInfo, SchemaType}

import scala.annotation.implicitNotFound


package object avro {

  @implicitNotFound("No Avro Schema for type ${T} found.")
  implicit def avroSchema[T: Manifest: SchemaFor: Encoder: Decoder]: Schema[T] = new Schema[T] {

    val schema: org.apache.avro.Schema = AvroSchema[T]

    override def clone(): Schema[T] = this

    override def encode(t: T): Array[Byte] = {
      val baos = new ByteArrayOutputStream
      val aos = AvroOutputStream.binary[T].to(baos).build(schema)
      aos.write(t)
      aos.flush()
      aos.close()
      baos.toByteArray()
    }

    override def decode(bytes: Array[Byte]): T = {
      val bais = new ByteArrayInputStream(bytes)
      val ais = AvroInputStream.binary[T].from(bais).build(schema)
      val first = ais.iterator.next()
      ais.close()
      first
    }

    override def getSchemaInfo: SchemaInfo =
      new SchemaInfo()
        .setName(manifest[T].runtimeClass.getCanonicalName)
        .setType(SchemaType.AVRO)
        .setSchema(schema.toString.getBytes(StandardCharsets.UTF_8))
  }
} 
Example 110
Source File: RawTextSender.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
} 
Example 111
Source File: RateLimitedOutputStreamSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
} 
Example 112
Source File: PythonRDDSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(
      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
  }
} 
Example 113
Source File: GenericAvroSerializerSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 114
Source File: DecoratorTest.scala    From Elysium   with MIT License 5 votes vote down vote up
package nz.daved.elysium.core

import java.io.{ByteArrayOutputStream, PrintStream}

import org.scalatest.{FlatSpec, Matchers}

class DecoratorTest extends FlatSpec with Matchers {

  "@before" should "deal with anonymous functions passed in" in {
    val out: ByteArrayOutputStream = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      DecoratorTestObject.world()
    }
    out.toString.stripLineEnd shouldBe "hello world"
  }

  "@after" should "deal with anonymous functions passed in" in {
    val out: ByteArrayOutputStream = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      DecoratorTestObject.hello()
    }
    out.toString.stripLineEnd shouldBe "hello world"
  }
} 
Example 115
Source File: SerializationTestHelper.scala    From xmlconfect   with Apache License 2.0 5 votes vote down vote up
package com.mthaler.xmlconfect

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

object SerializationTestHelper {

  
  def serializeDeserialize[T](obj: T): T = {
    val bout = new ByteArrayOutputStream()
    val out = new ObjectOutputStream(bout)
    out.writeObject(obj)
    val bin = new ByteArrayInputStream(bout.toByteArray)
    val in = new ObjectInputStream(bin)
    in.readObject().asInstanceOf[T]
  }
} 
Example 116
Source File: LandmarkIOTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.io

import java.io.{ByteArrayOutputStream, File, InputStream}
import java.net.URLDecoder

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.geometry._
import scalismo.statisticalmodel.MultivariateNormalDistribution

import scala.io.Source
import scala.language.implicitConversions
import scala.collection.immutable.Seq

class LandmarkIOTests extends ScalismoTestSuite {

  implicit def doubleToFloat(d: Double): Float = d.toFloat

  implicit def inputStreamToSource(s: InputStream): Source = Source.fromInputStream(s)

  describe("Spray LandmarkIO") {

    val csvName = "/landmarks.csv"
    def csvStream() = getClass.getResourceAsStream(csvName)

    val jsonName = "/landmarks.json"
    def jsonStream() = getClass.getResourceAsStream(jsonName)

    

    def distWithDefaultVectors(d1: Double, d2: Double, d3: Double): MultivariateNormalDistribution = {
      val axes = List(DenseVector[Double](1, 0, 0), DenseVector[Double](0, 1, 0), DenseVector[Double](0, 0, 1))
      val devs = List(d1, d2, d3)
      val data = axes zip devs
      MultivariateNormalDistribution(DenseVector[Double](0, 0, 0), data)
    }

    val jsonLm1 = Landmark("one", Point(1, 2, 3))
    val jsonLm2 = Landmark("two", Point(2, 3, 4), Some("Landmark two"), Some(distWithDefaultVectors(1, 4, 9)))
    val jsonLms = List(jsonLm1, jsonLm2)

    it("can serialize and deserialize simple landmarks using JSON") {
      val out = new ByteArrayOutputStream()
      LandmarkIO.writeLandmarksJsonToStream(jsonLms, out)
      val written = new String(out.toByteArray)
      val read = LandmarkIO.readLandmarksJsonFromSource[_3D](Source.fromString(written)).get
      read should equal(jsonLms)
    }

    it("can read simple landmarks from a JSON Stream") {
      val read = LandmarkIO.readLandmarksJsonFromSource[_3D](jsonStream()).get
      read should equal(jsonLms)
    }

  }
} 
Example 117
Source File: RichSparkFunctionsSpec.scala    From lighthouse   with Apache License 2.0 5 votes vote down vote up
package be.dataminded.lighthouse.pipeline

import java.io.ByteArrayOutputStream

import be.dataminded.lighthouse.testing.SharedSparkSession
import better.files._
import org.apache.spark.sql.Dataset
import org.apache.spark.storage.StorageLevel
import org.scalatest.BeforeAndAfter
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers

class RichSparkFunctionsSpec extends AnyFunSpec with Matchers with SharedSparkSession with BeforeAndAfter {

  import spark.implicits._

  describe("SparkFunctions with a DataSet inside should have extra functionality") {

    val function = SparkFunction.of(Seq(1, 2, 3, 4, 5).toDS())

    it("can cache") {
      function.cache().run(spark).storageLevel should equal(StorageLevel.MEMORY_ONLY)
    }

    it("can drop the cache") {
      function.cache().dropCache().run(spark).storageLevel should equal(StorageLevel.NONE)
    }

    it("can be written to a sink") {
      function.write(OrcSink("target/output/orc")).run(spark)

      file"target/output/orc".exists should be(true)
    }

    it("can be written to multiple sinks") {
      function.write(OrcSink("target/output/orc"), OrcSink("target/output/orc2")).run(spark)

      file"target/output/orc".exists should be(true)
      file"target/output/orc2".exists should be(true)
    }

    it("is being cached when writing to multiple sinks for performance") {
      val result = function.write(OrcSink("target/output/orc"), OrcSink("target/output/orc2")).run(spark)

      result.storageLevel should equal(StorageLevel.MEMORY_ONLY)
    }

    it("can easily be counted") {
      function.count().run(spark) should equal(5)
    }

    it("can print the schema") {
      val stream = new ByteArrayOutputStream()
      Console.withOut(stream) {
        function.printSchema().run(spark)
      }
      stream.toString() should include("value: integer (nullable = false)")
    }

    it("can be be used as a Dataset") {
      function.as[Int].run(spark) shouldBe a[Dataset[_]]
    }
  }

  after {
    file"target/output/orc".delete(true)
    file"target/output/orc2".delete(true)
  }
} 
Example 118
Source File: SparkAvroDecoder.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package cloudflow.spark.avro

import org.apache.log4j.Logger

import java.io.ByteArrayOutputStream

import scala.reflect.runtime.universe._

import org.apache.avro.generic.{ GenericDatumReader, GenericDatumWriter, GenericRecord }
import org.apache.avro.io.{ DecoderFactory, EncoderFactory }
import org.apache.spark.sql.{ Dataset, Encoder, Row }
import org.apache.spark.sql.catalyst.encoders.{ encoderFor, ExpressionEncoder, RowEncoder }
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types.StructType
import org.apache.avro.Schema

import cloudflow.spark.sql.SQLImplicits._

case class EncodedKV(key: String, value: Array[Byte])

case class SparkAvroDecoder[T: Encoder: TypeTag](avroSchema: String) {

  val encoder: Encoder[T]                           = implicitly[Encoder[T]]
  val sqlSchema: StructType                         = encoder.schema
  val encoderForDataColumns: ExpressionEncoder[Row] = RowEncoder(sqlSchema)
  @transient lazy val _avroSchema                   = new Schema.Parser().parse(avroSchema)
  @transient lazy val rowConverter                  = SchemaConverters.createConverterToSQL(_avroSchema, sqlSchema)
  @transient lazy val datumReader                   = new GenericDatumReader[GenericRecord](_avroSchema)
  @transient lazy val decoder                       = DecoderFactory.get
  def decode(bytes: Array[Byte]): Row = {
    val binaryDecoder = decoder.binaryDecoder(bytes, null)
    val record        = datumReader.read(null, binaryDecoder)
    rowConverter(record).asInstanceOf[GenericRow]
  }

}


case class SparkAvroEncoder[T: Encoder: TypeTag](avroSchema: String) {

  @transient lazy val log = Logger.getLogger(getClass.getName)

  val BufferSize = 5 * 1024 // 5 Kb

  val encoder                     = implicitly[Encoder[T]]
  val sqlSchema                   = encoder.schema
  @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema)

  val recordName                = "topLevelRecord" // ???
  val recordNamespace           = "recordNamespace" // ???
  @transient lazy val converter = AvroConverter.createConverterToAvro(sqlSchema, recordName, recordNamespace)

  // Risk: This process is memory intensive. Might require thread-level buffers to optimize memory usage
  def rowToBytes(row: Row): Array[Byte] = {
    val genRecord = converter(row).asInstanceOf[GenericRecord]
    if (log.isDebugEnabled) log.debug(s"genRecord = $genRecord")
    val datumWriter   = new GenericDatumWriter[GenericRecord](_avroSchema)
    val avroEncoder   = EncoderFactory.get
    val byteArrOS     = new ByteArrayOutputStream(BufferSize)
    val binaryEncoder = avroEncoder.binaryEncoder(byteArrOS, null)
    datumWriter.write(genRecord, binaryEncoder)
    binaryEncoder.flush()
    byteArrOS.toByteArray
  }

  def encode(dataset: Dataset[T]): Dataset[Array[Byte]] =
    dataset.toDF().mapPartitions(rows ⇒ rows.map(rowToBytes)).as[Array[Byte]]

  // Note to self: I'm not sure how heavy this chain of transformations is
  def encodeWithKey(dataset: Dataset[T], keyFun: T ⇒ String): Dataset[EncodedKV] = {
    val encoder             = encoderFor[T]
    implicit val rowEncoder = RowEncoder(encoder.schema).resolveAndBind()
    dataset.map { value ⇒
      val key         = keyFun(value)
      val internalRow = encoder.toRow(value)
      val row         = rowEncoder.fromRow(internalRow)
      val bytes       = rowToBytes(row)
      EncodedKV(key, bytes)
    }
  }

} 
Example 119
Source File: BooApp.scala    From boopickle   with Apache License 2.0 5 votes vote down vote up
package boopickle.perftests

import java.io.ByteArrayOutputStream
import java.util.zip.GZIPOutputStream

import boopickle.BufferPool

object BooApp {

  def main(args: Array[String]): Unit = {
    runTests()
  }

  def runTests(): Unit = {
    Tests.suites.zipWithIndex.foreach {
      case (suite, idx) =>
        val header = s"${1 + idx}/${Tests.suites.size} : ${suite.name}"
        println(header)
        println("=" * header.length)
        println(f"${"Library"}%-10s ${"ops/s"}%-10s ${"%"}%-10s ${"size"}%-10s ${"%"}%-10s ${"size.gz"}%-10s ${"%"}%-10s")
        val tester = new PerfTester(suite)
        val res    = tester.runSuite
        // zip result data to see how small it gets
        val resSizes = res.results.map { r =>
          val rawSize = r.data.length
          val bs      = new ByteArrayOutputStream()
          val gs      = new GZIPOutputStream(bs)
          gs.write(r.data)
          gs.finish()
          bs.flush()
          val gzipped = bs.toByteArray.length
          (r, rawSize, gzipped)
        }
        val maxCount  = resSizes.map(_._1.count).max
        val minSize   = resSizes.map(_._2).min
        val minGZSize = resSizes.map(_._3).min
        resSizes.foreach { r =>
          println(
            f"${r._1.name}%-10s ${r._1.count}%-10d ${f"${r._1.count * 100.0 / maxCount}%.1f%%"}%-10s ${r._2}%-10d ${f"${r._2 * 100.0 / minSize}%.0f%%"}%-10s ${r._3}%-10d ${f"${r._3 * 100.0 / minGZSize}%.0f%%"}%-10s")
        }
        println()
        // print out buffer pool usage
        println(s"""BufferPool:
           |  allocations = ${BufferPool.allocOk}
           |  misses      = ${BufferPool.allocMiss}
           """.stripMargin)
    }
  }
} 
Example 120
Source File: package.scala    From sttp   with Apache License 2.0 5 votes vote down vote up
package sttp.client

import java.io.{ByteArrayOutputStream, InputStream, OutputStream}
import java.nio.{Buffer, ByteBuffer}

import scala.annotation.{implicitNotFound, tailrec}

package object internal {
  private[client] def contentTypeWithCharset(ct: String, charset: String): String =
    s"$ct; charset=$charset"

  private[client] def charsetFromContentType(ct: String): Option[String] =
    ct.split(";").map(_.trim.toLowerCase).collectFirst {
      case s if s.startsWith("charset=") && s.substring(8).trim != "" => s.substring(8).trim
    }

  private[client] def transfer(is: InputStream, os: OutputStream): Unit = {
    var read = 0
    val buf = new Array[Byte](1024)

    @tailrec
    def transfer(): Unit = {
      read = is.read(buf, 0, buf.length)
      if (read != -1) {
        os.write(buf, 0, read)
        transfer()
      }
    }

    transfer()
  }

  private[client] def toByteArray(is: InputStream): Array[Byte] = {
    val os = new ByteArrayOutputStream
    transfer(is, os)
    os.toByteArray
  }

  private[client] def concatByteBuffers(bb1: ByteBuffer, bb2: ByteBuffer): ByteBuffer = {
    val buf = ByteBuffer
      .allocate(bb1.array().length + bb2.array().length)
      .put(bb1)
      .put(bb2)
    // rewind() returns Buffer in Java8, and ByteBuffer in Java11
    // calling the method from the base class to avoid NoSuchMethodError
    (buf: Buffer).rewind()
    buf
  }

  
  private[client] def sanitizeCharset(charset: String): String = {
    val c2 = charset.trim()
    val c3 = if (c2.startsWith("\"")) c2.substring(1) else c2
    if (c3.endsWith("\"")) c3.substring(0, c3.length - 1) else c3
  }

  @implicitNotFound(
    "This is a partial request, the method & url are not specified. Use " +
      ".get(...), .post(...) etc. to obtain a non-partial request."
  )
  private[client] type IsIdInRequest[U[_]] = U[Unit] =:= Identity[Unit]

  private[client] val Utf8 = "utf-8"
  private[client] val Iso88591 = "iso-8859-1"
  private[client] val CrLf = "\r\n"
} 
Example 121
Source File: CuModule.scala    From neuroflow   with Apache License 2.0 5 votes vote down vote up
package neuroflow.cuda

import jcuda.driver.{CUfunction, CUmodule}
import jcuda.driver.JCudaDriver._
import breeze.macros.arityize
import java.io.{ByteArrayOutputStream, InputStream}
import jcuda.{CudaException, Pointer}


  private def loadData(inputStream: InputStream): Array[Byte] = {
    val baos: ByteArrayOutputStream = new ByteArrayOutputStream
    try {
      val buffer = new Array[Byte](8192)
      var done = false
      while (!done) {
        val read: Int = inputStream.read(buffer)
        if (read == -1) {
          done = true
        } else {
          baos.write(buffer, 0, read)
        }
      }
      baos.write('\0')
      baos.flush()
      baos.toByteArray
    } finally {
      baos.close()
    }
  }
} 
Example 122
Source File: NumPyTest.scala    From featran   with Apache License 2.0 5 votes vote down vote up
package com.spotify.featran.numpy

import java.io.{ByteArrayOutputStream, OutputStream}

import org.scalatest._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class NumPyTest extends AnyFlatSpec with Matchers {
  private def test(f: OutputStream => Unit)(expectedFile: String): Unit = {
    val actual = {
      val baos = new ByteArrayOutputStream()
      f(baos)
      baos.toByteArray
    }

    val expected = {
      val in = this.getClass.getResourceAsStream(expectedFile)
      val out = new ByteArrayOutputStream(math.max(32, in.available()))
      val buf = new Array[Byte](8192)
      var r = in.read(buf)
      while (r != -1) {
        out.write(buf, 0, r)
        r = in.read(buf)
      }
      out.toByteArray
    }

    actual shouldBe expected
  }

  "NumPy" should "work with 1-dimensional arrays" in {
    val a1d = (0 until 10).toArray
    test(NumPy.write(_, a1d))("/a1d-int.npy")
    test(NumPy.write(_, a1d.map(_.toLong)))("/a1d-long.npy")
    test(NumPy.write(_, a1d.map(_.toFloat)))("/a1d-float.npy")
    test(NumPy.write(_, a1d.map(_.toDouble)))("/a1d-double.npy")

    // scalastyle:off no.whitespace.before.left.bracket
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a1d, Seq(20)))("/a1d-int.npy")
    } should have message "requirement failed: Invalid shape, 20 != 10"
    // scalastyle:on no.whitespace.before.left.bracket
  }

  it should "work with 2-dimensional arrays" in {
    val a2d = (for {
      i <- 0 until 10
      j <- 0 until 5
    } yield i * 10 + j).toArray
    test(NumPy.write(_, a2d, Seq(10, 5)))("/a2d-int.npy")
    test(NumPy.write(_, a2d.map(_.toLong), Seq(10, 5)))("/a2d-long.npy")
    test(NumPy.write(_, a2d.map(_.toFloat), Seq(10, 5)))("/a2d-float.npy")
    test(NumPy.write(_, a2d.map(_.toDouble), Seq(10, 5)))("/a2d-double.npy")

    // scalastyle:off no.whitespace.before.left.bracket
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d, Seq(20, 5)))("/a1d-int.npy")
    } should have message "requirement failed: Invalid shape, 20 * 5 != 50"
    // scalastyle:on no.whitespace.before.left.bracket
  }

  it should "work with iterators" in {
    val a2d = (0 until 10).map(i => (0 until 5).map(j => i * 10 + j).toArray)
    test(NumPy.write(_, a2d.iterator, 10, 5))("/a2d-int.npy")
    test(NumPy.write(_, a2d.iterator.map(_.map(_.toLong)), 10, 5))("/a2d-long.npy")
    test(NumPy.write(_, a2d.iterator.map(_.map(_.toFloat)), 10, 5))("/a2d-float.npy")
    test(NumPy.write(_, a2d.iterator.map(_.map(_.toDouble)), 10, 5))("/a2d-double.npy")

    // scalastyle:off no.whitespace.before.left.bracket
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d.iterator, 10, 10))("/a2d-int.npy")
    } should have message "requirement failed: Invalid row size, expected: 10, actual: 5"

    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d.iterator, 20, 5))("/a2d-int.npy")
    } should have message "requirement failed: Invalid number of rows, expected: 20, actual: 10"

    // hit the header.length % 16 == 0 condition
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d.iterator, 1000000000, 50))("/a2d-int.npy")
    } should have message "requirement failed: Invalid row size, expected: 50, actual: 5"
    // scalastyle:on no.whitespace.before.left.bracket
  }
} 
Example 123
Source File: GZip.scala    From polynote   with Apache License 2.0 5 votes vote down vote up
package polynote.util

import java.io.ByteArrayOutputStream
import java.util.zip.GZIPOutputStream

import zio.RIO
import zio.blocking.{Blocking, effectBlocking}
import zio.ZIO.effectTotal

object GZip {
  def apply(bytes: => Array[Byte]): RIO[Blocking, Array[Byte]] = effectTotal(new ByteArrayOutputStream()).bracket(os => effectTotal(os.close())) {
    bos => effectBlocking {
      val os = new GZIPOutputStream(bos, true)
      os.write(bytes)
      os.flush()
      os.close()
      bos.toByteArray
    }
  }
} 
Example 124
Source File: StatsController.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package controllers.document.stats

import com.mohiva.play.silhouette.api.Silhouette
import controllers.{BaseOptAuthController, Security, HasVisitLogging, HasPrettyPrintJSON}
import java.io.{ByteArrayOutputStream, PrintWriter}
import javax.inject.{Inject, Singleton}
import kantan.csv._
import kantan.csv.ops._
import kantan.csv.CsvConfiguration.{Header, QuotePolicy}
import kantan.csv.engine.commons._
import services.annotation.AnnotationService
import services.document.DocumentService
import services.user.UserService
import services.user.Roles._
import services.visit.VisitService
import org.webjars.play.WebJarsUtil
import play.api.Configuration
import play.api.mvc.{AnyContent, Request, Result, ControllerComponents}
import play.api.libs.json._
import play.api.libs.functional.syntax._
import play.api.i18n.I18nSupport
import plugins.PluginRegistry
import scala.concurrent.{ExecutionContext, Future}

@Singleton
class StatsController @Inject() (
  val components: ControllerComponents,
  val config: Configuration,
  val documents: DocumentService,
  val annotations: AnnotationService,
  val users: UserService,
  val silhouette: Silhouette[Security.Env],
  implicit val visitService: VisitService,
  implicit val webjars: WebJarsUtil,
  implicit val ctx: ExecutionContext
) extends BaseOptAuthController(components, config, documents, users) 
    with HasVisitLogging 
    with HasPrettyPrintJSON 
    with I18nSupport {
  
  private val CSV_CONFIG = CsvConfiguration(',', '"', QuotePolicy.WhenNeeded, Header.None)
    
  implicit val tuple2Writes: Writes[Tuple2[String, Long]] = (
    (JsPath \ "value").write[String] and
    (JsPath \ "count").write[Long]
  )(t => (t._1, t._2))
  
  private def toCSV(stats: Seq[(String, Long)]): String = {
    val out = new ByteArrayOutputStream()
    val writer = out.asCsvWriter[(String, Long)](CSV_CONFIG)
    stats.foreach(writer.write(_))
    writer.close()
    new String(out.toByteArray, "UTF-8")
  }
  
  def showDocumentStats(documentId: String, tab: Option[String]) = silhouette.UserAwareAction.async { implicit request =>
    documentReadResponse(documentId, request.identity,  { case (doc, accesslevel) =>
      logDocumentView(doc.document, None, accesslevel)      
      tab.map(_.toLowerCase) match {
        case Some(t) if t == "activity" =>  
          val plugins = PluginRegistry.listConfigs("document.stats.activity")
          Future.successful(Ok(views.html.document.stats.activity(doc, request.identity, accesslevel, plugins)))
          
        case Some(t) if t == "entities" =>
          val plugins = PluginRegistry.listConfigs("document.stats.entities")
          Future.successful(Ok(views.html.document.stats.entities(doc, request.identity, accesslevel, plugins)))
          
        case Some(t) if t == "tags" =>
          val plugins = PluginRegistry.listConfigs("document.stats.tags")
          Future.successful(Ok(views.html.document.stats.tags(doc, request.identity, accesslevel, plugins)))
          
        case _ =>
          val plugins = PluginRegistry.listConfigs("document.stats.activity")
          Future.successful(Ok(views.html.document.stats.activity(doc, request.identity, accesslevel, plugins)))
      }
    })
  }
  
  private def getTags(documentId: String)(action: (Seq[(String, Long)], Request[AnyContent]) => Result) =
    silhouette.UserAwareAction.async { implicit request =>
      documentReadResponse(documentId, request.identity,  { case (doc, accesslevel) =>
          annotations.getTagStats(documentId).map { buckets =>
            action(buckets, request.request)
          }
        }
      )
    }
  
  def getTagsAsJSON(documentId: String) = getTags(documentId) { case (buckets, request) =>
    jsonOk(Json.toJson(buckets))(request)
  }
  
  def getTagsAsCSV(documentId: String) = getTags(documentId) { case(buckets, request) =>
    Ok(toCSV(buckets)).withHeaders(CONTENT_DISPOSITION -> { s"attachment; filename=${documentId}_tags.csv" })
  }

} 
Example 125
Source File: DefineMacroCmd.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.piglet.op.cmd

import java.io.{ObjectInputStream, ByteArrayInputStream, ObjectOutputStream, ByteArrayOutputStream}
import dbis.piglet.plan.DataflowPlan
import scala.collection.mutable.ListBuffer
import dbis.piglet.op.{Pipe,PigOperator}


case class DefineMacroCmd(
    out: Pipe, 
    macroName: String, 
    params: Option[List[String]], 
    stmts: List[PigOperator]
  ) extends PigOperator(out) {

  var subPlan: Option[DataflowPlan] = None
  var inPipes = List[Pipe]()

  def deepClone(): DefineMacroCmd = {
      val baos = new ByteArrayOutputStream()
      val oos = new ObjectOutputStream(baos)
      oos.writeObject(this)
      val bais = new ByteArrayInputStream(baos.toByteArray())
      val ois = new ObjectInputStream(bais)
      ois.readObject().asInstanceOf[DefineMacroCmd]
  }

  override def preparePlan: Unit = {
    
  def pipeParamPositions(): List[Int] = {
    val l = ListBuffer[Int]()
    inPipes.foreach(i => {
      val pos = params.get.indexOf(i.name.substring(1))
      if (pos >= 0) l += pos
    })
    l.toList
  }
} 
Example 126
Source File: package.scala    From pbdirect   with MIT License 5 votes vote down vote up
import java.io.ByteArrayOutputStream
import java.util

import cats.data.{NonEmptyList => NEL}
import com.google.protobuf.{CodedInputStream, CodedOutputStream}

package object pbdirect {
  implicit class PBWriterOps[A <: AnyRef](private val a: A) extends AnyVal {

    def toPB(implicit writer: PBWriter[A]): Array[Byte] = {
      val out = new ByteArrayOutputStream()
      val pbOut = CodedOutputStream.newInstance(out)
      val sizes = IdentityMaps.emptyJavaIdentityMap[Any, Int]
      writer.writeTo(NEL.one(1), a, pbOut, sizes)
      pbOut.flush()
      val bytes = out.toByteArray
      // remove the tag and return the content
      val input = CodedInputStream.newInstance(bytes)
      input.readTag()
      input.readByteArray()
    }
  }
  implicit class PBParserOps(private val bytes: Array[Byte]) extends AnyVal {

    def pbTo[A](implicit reader: PBParser[A]): A = {
      // wraps the bytes into a protobuf single field message
      val out = new ByteArrayOutputStream()
      val pbOut = CodedOutputStream.newInstance(out)
      pbOut.writeByteArray(1, bytes)
      pbOut.flush()
      reader.parse(NEL.one(1), out.toByteArray)
    }
  }
} 
Example 127
Source File: Avro4sJsonSupport.scala    From kafka-serde-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.azhur.kafkaserdeavro4s

import java.io.ByteArrayOutputStream
import java.util

import com.sksamuel.avro4s.{
  AvroJsonInputStream,
  AvroOutputStream,
  FromRecord,
  SchemaFor,
  ToRecord
}
import org.apache.avro.file.SeekableByteArrayInput
import org.apache.kafka.common.errors.SerializationException
import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }

import scala.language.implicitConversions
import scala.util.control.NonFatal
import scala.util.{ Failure, Success }

trait Avro4sJsonSupport {
  implicit def toSerializer[T >: Null](implicit schemaFor: SchemaFor[T],
                                       toRecord: ToRecord[T]): Serializer[T] =
    new Serializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serialize(topic: String, data: T): Array[Byte] =
        if (data == null) null
        else {
          val baos = new ByteArrayOutputStream()
          try {
            val output = AvroOutputStream.json[T](baos)
            try {
              output.write(data)
            } finally {
              output.close()
            }
            baos.toByteArray
          } catch {
            case NonFatal(e) => throw new SerializationException(e)
          } finally {
            baos.close()
          }
        }
    }

  implicit def toDeserializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Deserializer[T] =
    new Deserializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def deserialize(topic: String, data: Array[Byte]): T =
        if (data == null) null
        else
          new AvroJsonInputStream[T](new SeekableByteArrayInput(data),
                                     schemas.writerSchema,
                                     schemas.readerSchema).singleEntity match {
            case Success(json)  => json
            case Failure(error) => throw new SerializationException(error)
          }
    }

  implicit def toSerde[T >: Null](
      implicit schemaFor: SchemaFor[T],
      toRecord: ToRecord[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Serde[T] =
    new Serde[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serializer(): Serializer[T]                                   = toSerializer[T]
      override def deserializer(): Deserializer[T]                               = toDeserializer[T]
    }
}

object Avro4sJsonSupport extends Avro4sJsonSupport 
Example 128
Source File: Avro4sDataSupport.scala    From kafka-serde-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.azhur.kafkaserdeavro4s

import java.io.ByteArrayOutputStream
import java.util

import com.sksamuel.avro4s.{
  AvroDataInputStream,
  AvroDataOutputStream,
  FromRecord,
  SchemaFor,
  ToRecord
}
import org.apache.avro.file.{ CodecFactory, SeekableByteArrayInput }
import org.apache.kafka.common.errors.SerializationException
import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }

import scala.language.implicitConversions
import scala.util.control.NonFatal
import scala.util.{ Failure, Success }

trait Avro4sDataSupport {
  implicit def toSerializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      toRecord: ToRecord[T],
      codec: CodecFactory = CodecFactory.nullCodec()
  ): Serializer[T] =
    new Serializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serialize(topic: String, data: T): Array[Byte] =
        if (data == null) null
        else {
          val baos = new ByteArrayOutputStream()
          try {
            val output = AvroDataOutputStream[T](baos, codec)
            try {
              output.write(data)
            } finally {
              output.close()
            }
            baos.toByteArray
          } catch {
            case NonFatal(e) => throw new SerializationException(e)
          } finally {
            baos.close()
          }
        }
    }

  implicit def toDeserializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Deserializer[T] =
    new Deserializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def deserialize(topic: String, data: Array[Byte]): T =
        if (data == null) null
        else {
          val it = new AvroDataInputStream[T](new SeekableByteArrayInput(data),
                                              schemas.writerSchema,
                                              schemas.readerSchema).tryIterator
          if (it.hasNext) {
            it.next() match {
              case Success(record) => record
              case Failure(err)    => throw new SerializationException(err)
            }
          } else {
            throw new SerializationException("Empty avro4s data iterator")
          }
        }

    }

  implicit def toSerde[T >: Null](implicit schemaFor: SchemaFor[T],
                                  toRecord: ToRecord[T],
                                  fromRecord: FromRecord[T],
                                  codec: CodecFactory = CodecFactory.nullCodec()): Serde[T] =
    new Serde[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serializer(): Serializer[T]                                   = toSerializer[T]
      override def deserializer(): Deserializer[T]                               = toDeserializer[T]
    }
}

object Avro4sDataSupport extends Avro4sDataSupport 
Example 129
Source File: Avro4sBinarySupport.scala    From kafka-serde-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.azhur.kafkaserdeavro4s

import java.io.ByteArrayOutputStream
import java.util

import com.sksamuel.avro4s.{
  AvroBinaryInputStream,
  AvroOutputStream,
  FromRecord,
  SchemaFor,
  ToRecord
}
import org.apache.avro.file.SeekableByteArrayInput
import org.apache.kafka.common.errors.SerializationException
import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }

import scala.language.implicitConversions
import scala.util.{ Failure, Success }
import scala.util.control.NonFatal

trait Avro4sBinarySupport {
  implicit def toSerializer[T >: Null](implicit schemaFor: SchemaFor[T],
                                       toRecord: ToRecord[T]): Serializer[T] =
    new Serializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serialize(topic: String, data: T): Array[Byte] =
        if (data == null) null
        else {
          val baos = new ByteArrayOutputStream()
          try {
            val output = AvroOutputStream.binary[T](baos)
            try {
              output.write(data)
            } finally {
              output.close()
            }
            baos.toByteArray
          } catch {
            case NonFatal(e) => throw new SerializationException(e)
          } finally {
            baos.close()
          }
        }
    }

  implicit def toDeserializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Deserializer[T] =
    new Deserializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def deserialize(topic: String, data: Array[Byte]): T =
        if (data == null) null
        else {
          val it = new AvroBinaryInputStream[T](new SeekableByteArrayInput(data),
                                                schemas.writerSchema,
                                                schemas.readerSchema).tryIterator
          if (it.hasNext) {
            it.next() match {
              case Success(record) => record
              case Failure(err)    => throw new SerializationException(err)
            }
          } else {
            throw new SerializationException("Empty avro4s binary iterator")
          }
        }

    }

  implicit def toSerde[T >: Null](
      implicit schemaFor: SchemaFor[T],
      toRecord: ToRecord[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Serde[T] =
    new Serde[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serializer(): Serializer[T]                                   = toSerializer[T]
      override def deserializer(): Deserializer[T]                               = toDeserializer[T]
    }
}

object Avro4sBinarySupport extends Avro4sBinarySupport 
Example 130
Source File: Serdes.scala    From tamer   with MIT License 5 votes vote down vote up
package tamer

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer

import com.sksamuel.avro4s._
import org.apache.avro.Schema
import tamer.registry._
import zio.{RIO, Task}
import zio.kafka.client.serde.{Deserializer, Serializer}

sealed trait Serde[A] extends Any {
  def isKey: Boolean
  def schema: Schema
  def deserializer: Deserializer[Registry with Topic, A]
  def serializer: Serializer[Registry with Topic, A]
  final def serde: ZSerde[Registry with Topic, A] = ZSerde(deserializer)(serializer)
}

object Serde {
  private[this] final val Magic: Byte = 0x0
  private[this] final val intByteSize = 4

  final def apply[A <: Product: Decoder: Encoder: SchemaFor](isKey: Boolean = false) =
    new RecordSerde[A](isKey, SchemaFor[A].schema(DefaultFieldMapper))

  final class RecordSerde[A: Decoder: Encoder](override final val isKey: Boolean, override final val schema: Schema) extends Serde[A] {
    private[this] def subject(topic: String): String = s"$topic-${if (isKey) "key" else "value"}"
    override final val deserializer: Deserializer[Registry with Topic, A] = Deserializer.byteArray.mapM { ba =>
      val buffer = ByteBuffer.wrap(ba)
      if (buffer.get() != Magic) RIO.fail(SerializationError("Unknown magic byte!"))
      else {
        val id = buffer.getInt()
        for {
          env <- RIO.environment[Registry]
          _   <- env.registry.verifySchema(id, schema)
          res <- RIO.fromTry {
            val length  = buffer.limit() - 1 - intByteSize
            val payload = new Array[Byte](length)
            buffer.get(payload, 0, length)
            AvroInputStream.binary[A].from(payload).build(schema).tryIterator.next
          }
        } yield res
      }
    }
    override final val serializer: Serializer[Registry with Topic, A] = Serializer.byteArray.contramapM { a =>
      for {
        env <- RIO.environment[Registry with Topic]
        id  <- env.registry.getOrRegisterId(subject(env.topic), schema)
        arr <- Task {
          val baos = new ByteArrayOutputStream
          baos.write(Magic.toInt)
          baos.write(ByteBuffer.allocate(intByteSize).putInt(id).array())
          val ser = AvroOutputStream.binary[A].to(baos).build(schema)
          ser.write(a)
          ser.close()
          baos.toByteArray
        }
      } yield arr
    }
  }
} 
Example 131
Source File: TestUtils.scala    From cats-effect   with Apache License 2.0 5 votes vote down vote up
package cats.effect.internals

import java.io.{ByteArrayOutputStream, OutputStream, PrintStream}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal


  def catchSystemErrInto[T](outStream: OutputStream)(thunk: => T): T = synchronized {
    val oldErr = System.err
    val fakeErr = new PrintStream(outStream)
    System.setErr(fakeErr)
    try {
      thunk
    } finally {
      System.setErr(oldErr)
      fakeErr.close()
    }
  }
} 
Example 132
Source File: CancelUtilsTests.scala    From cats-effect   with Apache License 2.0 5 votes vote down vote up
package cats.effect.internals

import java.io.ByteArrayOutputStream
import cats.effect.IO
import org.scalatest.matchers.should.Matchers
import org.scalatest.funsuite.AnyFunSuite
import scala.util.control.NonFatal

class CancelUtilsTests extends AnyFunSuite with Matchers with TestUtils {
  test("cancelAll works for zero references") {
    CancelUtils.cancelAll().unsafeRunSync()
  }

  test("cancelAll works for one reference") {
    var wasCanceled = false
    CancelUtils.cancelAll(IO { wasCanceled = true }).unsafeRunSync()
    wasCanceled shouldBe true
  }

  test("cancelAll catches error from one reference") {
    val dummy = new RuntimeException("dummy")
    var wasCanceled1 = false
    var wasCanceled2 = false

    val io = CancelUtils.cancelAll(
      IO { wasCanceled1 = true },
      IO(throw dummy),
      IO { wasCanceled2 = true }
    )

    try {
      io.unsafeRunSync()
      fail("should have throw exception")
    } catch {
      case `dummy` =>
        wasCanceled1 shouldBe true
        wasCanceled2 shouldBe true
    }
  }

  test("cancelAll catches the first error and logs the rest") {
    val dummy1 = new RuntimeException("dummy1")
    val dummy2 = new RuntimeException("dummy2")
    var wasCanceled1 = false
    var wasCanceled2 = false

    val io = CancelUtils.cancelAll(
      IO { wasCanceled1 = true },
      IO(throw dummy1),
      IO(throw dummy2),
      IO { wasCanceled2 = true }
    )

    val sysErr = new ByteArrayOutputStream()
    try {
      catchSystemErrInto(sysErr) {
        io.unsafeRunSync()
      }
      fail("should have throw exception")
    } catch {
      case NonFatal(error) =>
        error shouldBe dummy1
        sysErr.toString("utf-8") should include("dummy2")
        dummy1.getSuppressed shouldBe empty // ensure memory isn't leaked with addSuppressed
        dummy2.getSuppressed shouldBe empty // ensure memory isn't leaked with addSuppressed
    }
  }
} 
Example 133
Source File: JVMReprSpec.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package integration.interpreter.scala

import java.util
import java.io.ByteArrayOutputStream
import jupyter.{Displayer, Displayers, MIMETypes}
import org.apache.toree.global.StreamState
import org.apache.toree.interpreter.Interpreter
import org.apache.toree.interpreter.Results.Success
import org.apache.toree.kernel.api.{DisplayMethodsLike, KernelLike}
import org.apache.toree.kernel.interpreter.scala.ScalaInterpreter
import org.mockito.Mockito.doReturn
import org.scalatest.{BeforeAndAfter, FunSpec, Matchers}
import org.scalatest.mock.MockitoSugar
import scala.util.Random

class JVMReprSpec extends FunSpec with Matchers with MockitoSugar with BeforeAndAfter {

  private val outputResult = new ByteArrayOutputStream()
  private var interpreter: Interpreter = _

  before {
    val mockKernel = mock[KernelLike]
    val mockDisplayMethods = mock[DisplayMethodsLike]
    doReturn(mockDisplayMethods).when(mockKernel).display

    interpreter = new ScalaInterpreter().init(mockKernel)

    StreamState.setStreams(outputStream = outputResult)
  }

  after {
    interpreter.stop()
    outputResult.reset()
  }

  describe("ScalaInterpreter") {
    describe("#interpret") {
      it("should display Scala int as a text representation") {
        val (result, outputOrError) = interpreter.interpret("val a = 12")

        result should be(Success)
        outputOrError.isLeft should be(true)
        outputOrError.left.get should be(Map(MIMETypes.TEXT -> "12"))
      }

      it("should display Scala Some(str) as a text representation") {
        val (result, outputOrError) = interpreter.interpret("""val a = Some("str")""")

        result should be(Success)
        outputOrError.isLeft should be(true)
        outputOrError.left.get should be(Map(MIMETypes.TEXT -> "Some(str)"))
      }

      ignore("should use the Jupyter REPR API for display representation") {
        Displayers.register(classOf[DisplayerTest], new Displayer[DisplayerTest] {
          override def display(t: DisplayerTest): util.Map[String, String] = {
            val output = new util.HashMap[String, String]()
            output.put("text/plain", s"test object: ${t.id}")
            output.put("application/json", s"""{"id": ${t.id}""")
            output
          }
        })

        val inst = DisplayerTest()
        interpreter.bind("inst", classOf[DisplayerTest].getName, inst, List())

        val (result, outputOrError) = interpreter.interpret("""inst""")

        result should be(Success)
        outputOrError.isLeft should be(true)
        outputOrError.left.get should be(Map(
          MIMETypes.TEXT -> s"test object: ${inst.id}",
          "application/json" -> s"""{"id": ${inst.id}"""
        ))
      }
    }
  }
}

case class DisplayerTest(id: Long = new Random().nextLong()) 
Example 134
Source File: AvroMessageConverter.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.jms.sink.converters

import java.io.ByteArrayOutputStream

import com.datamountaineer.streamreactor.connect.jms.config.JMSSetting
import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil
import com.datamountaineer.streamreactor.connect.serialization.AvroSerializer
import javax.jms.{BytesMessage, Session}
import org.apache.kafka.connect.sink.SinkRecord

class AvroMessageConverter extends JMSMessageConverter with ConverterUtil {

  override def convert(record: SinkRecord, session: Session, setting: JMSSetting): (String, BytesMessage) = {
    val converted =  super[ConverterUtil].convert(record, setting.fields, setting.ignoreField)
    val avroRecord = convertValueToGenericAvro(converted)
    val avroSchema = avroData.fromConnectSchema(converted.valueSchema())

    implicit  val os = new ByteArrayOutputStream()
    AvroSerializer.write(avroRecord, avroSchema)

    val message = session.createBytesMessage()
    message.writeBytes(os.toByteArray)
    (setting.source, message)
  }
} 
Example 135
Source File: AvroSerializer.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.bloomberg.avro

import java.io.ByteArrayOutputStream

import com.datamountaineer.streamreactor.connect.bloomberg.BloombergData
import com.datamountaineer.streamreactor.connect.bloomberg.avro.AvroSchemaGenerator._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericData.Record
import org.apache.avro.generic.{GenericData, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.EncoderFactory

import scala.collection.JavaConverters._

object AvroSerializer {

  
    private def recursive(record: GenericData.Record, schema: Schema, fieldName: String, value: Any): Unit = {
      value match {
        case _: Boolean => record.put(fieldName, value)
        case _: Int => record.put(fieldName, value)
        case _: Long => record.put(fieldName, value)
        case _: Double => record.put(fieldName, value)
        case _: Char => record.put(fieldName, value)
        case _: Float => record.put(fieldName, value)
        case _: String =>
          record.put(fieldName, value)
        case list: java.util.List[_] =>
          val tmpSchema = schema.getField(fieldName).schema()
          val itemSchema = if (tmpSchema.getType == Schema.Type.UNION) tmpSchema.getTypes.get(1) else tmpSchema
          require(itemSchema.getType == Schema.Type.ARRAY)
          //we might have a record not a primitive
          if (itemSchema.getElementType.getType == Schema.Type.RECORD) {
            val items = new GenericData.Array[GenericData.Record](list.size(), itemSchema)
            list.asScala.foreach { i =>
              //only map is allowed
              val m = i.asInstanceOf[java.util.Map[String, Any]]
              items.add(m.toAvroRecord(itemSchema.getElementType))
            }
            record.put(fieldName, items)
          } else {
            val items = new GenericData.Array[Any](list.size(), itemSchema)
            items.addAll(list)
            record.put(fieldName, items)
          }

        case map: java.util.LinkedHashMap[String @unchecked, _] =>
          //record schema
          val fieldSchema = schema.getField(fieldName).schema()
          val nestedSchema = if (fieldSchema.getType == Schema.Type.UNION) fieldSchema.getTypes.get(1) else fieldSchema
          val nestedRecord = new Record(nestedSchema)
          map.entrySet().asScala.foreach(e =>
            recursive(nestedRecord, nestedSchema, e.getKey, e.getValue))
          record.put(fieldName, nestedRecord)
      }
    }
  }
} 
Example 136
Source File: CallableAction.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.lang.v1.traits.domain

import com.wavesplatform.common.state.ByteStr
import com.wavesplatform.lang.v1.traits.domain.Recipient.Address

sealed trait CallableAction

case class AssetTransfer(
    recipient: Address,
    amount: Long,
    assetId: Option[ByteStr]
) extends CallableAction

case class Issue(
    id: ByteStr,
    compiledScript: Option[ByteStr],
    decimals: Int,
    description: String,
    isReissuable: Boolean,
    name: String,
    quantity: Long,
    nonce: Long
) extends CallableAction

object Issue {
  import java.io.ByteArrayOutputStream

  import com.wavesplatform.lang.utils.Serialize._
  import com.wavesplatform.lang.v1.BaseGlobal
  private val Global: BaseGlobal = com.wavesplatform.lang.Global // Hack for IDEA

  def create(
      compiledScript: Option[ByteStr],
      decimals: Int,
      description: String,
      isReissuable: Boolean,
      name: String,
      quantity: Long,
      nonce: Long,
      parent: ByteStr
  ): Issue = {
    val id = calculateId(decimals, description, isReissuable, name, quantity, nonce, parent)
    Issue(id, compiledScript, decimals, description, isReissuable, name, quantity, nonce)
  }

  def calculateId(
      decimals: Int,
      description: String,
      isReissuable: Boolean,
      name: String,
      quantity: Long,
      nonce: Long,
      parent: ByteStr
  ): ByteStr = {
    val out = new ByteArrayOutputStream()
    out.writeString(name)
    out.writeString(description)
    out.writeInt(decimals)
    out.writeLong(quantity)
    out.writeShort(if (isReissuable) 1 else 0)
    out.writeLong(nonce)
    out.write(parent.arr)
    ByteStr(Global.blake2b256(out.toByteArray))
  }
}

case class Reissue(
    assetId: ByteStr,
    isReissuable: Boolean,
    quantity: Long
) extends CallableAction

case class Burn(
    assetId: ByteStr,
    quantity: Long
) extends CallableAction

case class SponsorFee(
    assetId: ByteStr,
    minSponsoredAssetFee: Option[Long]
) extends CallableAction

sealed trait DataOp extends CallableAction {
  val key: String
}

sealed trait DataItem[T] extends DataOp {
  val value: T
}

object DataItem {
  case class Lng(k: String, v: Long)     extends DataItem[Long]    { val key = k; val value = v    }
  case class Bool(k: String, v: Boolean) extends DataItem[Boolean] { val key = k; val value = v    }
  case class Bin(k: String, v: ByteStr)  extends DataItem[ByteStr] { val key = k; val value = v    }
  case class Str(k: String, v: String)   extends DataItem[String]  { val key = k; val value = v    }
  case class Delete(key: String)         extends DataOp
} 
Example 137
Source File: Serialize.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.lang.utils

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets

import com.wavesplatform.lang.v1.FunctionHeader
import com.wavesplatform.lang.v1.FunctionHeader.{Native, User}
import com.wavesplatform.lang.v1.Serde.{FH_NATIVE, FH_USER}

object Serialize {
  implicit class ByteBufferOps(val self: ByteBuffer) extends AnyVal {
    def getBytes: Array[Byte] = {
      val len = self.getInt
      if (self.limit() < len || len < 0) {
        throw new Exception(s"Invalid array size ($len)")
      }
      val bytes = new Array[Byte](len)
      self.get(bytes)
      bytes
    }

    def getString: String = new String(getBytes, StandardCharsets.UTF_8)

    def getFunctionHeader: FunctionHeader = self.get() match {
      case FH_NATIVE => Native(self.getShort)
      case FH_USER   => User(getString)
      case x         => throw new RuntimeException(s"Unknown function header type: $x")
    }
  }

  implicit class ByteArrayOutputStreamOps(val self: ByteArrayOutputStream) extends AnyVal {
    def writeShort(value: Short): ByteArrayOutputStream = writeNumber(value, 2)
    def writeInt(value: Int): ByteArrayOutputStream     = writeNumber(value, 4)
    def writeLong(value: Long): ByteArrayOutputStream   = writeNumber(value, 8)

    def writeNumber(n: Long, byteCount: Int): ByteArrayOutputStream = {
      (byteCount - 1 to 0 by -1).foreach { i =>
        self.write((n >> (8 * i) & 0xffL).toInt)
      }
      self
    }

    def writeString(x: String): ByteArrayOutputStream = {
      val bytes = x.getBytes(StandardCharsets.UTF_8)
      self.writeInt(bytes.length)
      self.write(bytes)
      self
    }

    def writeFunctionHeader(h: FunctionHeader): ByteArrayOutputStream = h match {
      case FunctionHeader.Native(id) =>
        self.write(FH_NATIVE)
        self.writeShort(id)
      case FunctionHeader.User(internalName, _) =>
        self.write(FH_USER)
        self.writeString(internalName)
    }
  }
} 
Example 138
Source File: BasicMessagesRepoSpec.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.network

import java.io.ByteArrayOutputStream

import com.google.protobuf.{ByteString, CodedOutputStream, WireFormat}
import com.wavesplatform.TransactionGen
import com.wavesplatform.common.state.ByteStr
import com.wavesplatform.common.utils.EitherExt2
import com.wavesplatform.mining.MiningConstraints
import com.wavesplatform.protobuf.block._
import com.wavesplatform.protobuf.transaction._
import com.wavesplatform.transaction.Asset.IssuedAsset
import com.wavesplatform.transaction.smart.SetScriptTransaction
import com.wavesplatform.transaction.{DataTransaction, Proofs, TxVersion}
import org.scalatest._

class BasicMessagesRepoSpec extends FreeSpec with Matchers with TransactionGen {
  "PBBlockSpec max length" in {
    val maxSizedHeader = PBBlock.Header(
      Byte.MaxValue,
      ByteString.copyFrom(bytes64gen.sample.get),
      Long.MaxValue,
      ByteString.copyFrom(byteArrayGen(VanillaBlock.GenerationVRFSignatureLength).sample.get),
      Seq.fill(VanillaBlock.MaxFeaturesInBlock)(Short.MaxValue),
      Long.MaxValue,
      Byte.MaxValue,
      ByteString.copyFrom(bytes32gen.sample.get),
      Long.MaxValue,
      ByteString.copyFrom(bytes32gen.sample.get)
    )
    val maxSignature = ByteString.copyFrom(bytes64gen.sample.get)

    val headerSize    = maxSizedHeader.serializedSize
    val signatureSize = maxSignature.toByteArray.length

    val headerPBPrefix      = new ByteArrayOutputStream()
    val codedHeaderPBPrefix = CodedOutputStream.newInstance(headerPBPrefix)
    codedHeaderPBPrefix.writeTag(PBBlock.HEADER_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedHeaderPBPrefix.writeUInt32NoTag(headerSize)
    codedHeaderPBPrefix.flush()

    val signaturePBPrefix      = new ByteArrayOutputStream()
    val codedSignaturePBPrefix = CodedOutputStream.newInstance(signaturePBPrefix)
    codedSignaturePBPrefix.writeTag(PBBlock.SIGNATURE_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedSignaturePBPrefix.writeUInt32NoTag(maxSignature.toByteArray.length)
    codedSignaturePBPrefix.flush()

    val transactionPBPrefix               = new ByteArrayOutputStream()
    val codedTransactionMaxLengthPBPrefix = CodedOutputStream.newInstance(transactionPBPrefix)
    codedTransactionMaxLengthPBPrefix.writeTag(PBBlock.TRANSACTIONS_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedTransactionMaxLengthPBPrefix.writeUInt32NoTag(MiningConstraints.MaxTxsSizeInBytes)
    codedTransactionMaxLengthPBPrefix.flush()

    val minPossibleTransactionSize = PBTransactions
      .protobuf(
        SetScriptTransaction
          .selfSigned(
            TxVersion.V2,
            accountGen.sample.get,
            None,
            1L,
            0L
          )
          .explicitGet()
      )
      .serializedSize

    val maxSize =
      headerPBPrefix.toByteArray.length + headerSize +
        signaturePBPrefix.toByteArray.length + signatureSize +
        MiningConstraints.MaxTxsSizeInBytes +
        (transactionPBPrefix.toByteArray.length * MiningConstraints.MaxTxsSizeInBytes / minPossibleTransactionSize)

    maxSize should be <= PBBlockSpec.maxLength
  }

  "PBTransactionSpec max length" in {
    val maxSizeTransaction = PBSignedTransaction(
      Some(
        PBTransaction(
          Byte.MaxValue,
          ByteString.copyFrom(bytes32gen.sample.get),
          Some(PBAmounts.fromAssetAndAmount(IssuedAsset(ByteStr(bytes32gen.sample.get)), Long.MaxValue)),
          Long.MaxValue,
          Byte.MaxValue
        )
      ),
      Seq.fill(Proofs.MaxProofs)(ByteString.copyFrom(byteArrayGen(Proofs.MaxProofSize).sample.get))
    )

    val dataPBPrefix      = new ByteArrayOutputStream()
    val codedDataPBPrefix = CodedOutputStream.newInstance(dataPBPrefix)
    codedDataPBPrefix.writeTag(Transaction.DATA_TRANSACTION_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedDataPBPrefix.writeUInt32NoTag(DataTransaction.MaxProtoBytes)
    codedDataPBPrefix.flush()

    val size = maxSizeTransaction.serializedSize + dataPBPrefix.toByteArray.length + DataTransaction.MaxProtoBytes

    size should be <= PBTransactionSpec.maxLength
  }
} 
Example 139
Source File: TextDisplay.scala    From almond   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package almond.display

import java.io.{ByteArrayOutputStream, InputStream}
import java.net.{HttpURLConnection, URL, URLConnection}
import java.nio.charset.{Charset, StandardCharsets}

import scala.util.Try

abstract class TextDisplay extends UpdatableDisplay {

  def contentOrUrl: Either[URL, String]

  def content: Option[String] = contentOrUrl.right.toOption
  def url: Option[URL] = contentOrUrl.left.toOption

  def finalContent: String =
    contentOrUrl match {
      case Left(url) =>
        TextDisplay.urlContent(url)
      case Right(c) => c
    }

  def withContent(code: String): UpdatableDisplay
  def withUrl(url: String): UpdatableDisplay

}

object TextDisplay {

  type Builder[T] = Display.Builder[String, T]

  private[almond] def readFully(is: InputStream): Array[Byte] = {

    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ( {
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }

  def urlContent(url: URL): String = {

    var conn: URLConnection = null
    val (rawContent, charsetOpt) = try {
      conn = url.openConnection()
      conn.setConnectTimeout(5000) // allow users to tweak that?
      val b = readFully(conn.getInputStream)
      val charsetOpt0 = conn match {
        case conn0: HttpURLConnection =>
          conn0
            .getContentType
            .split(';')
            .map(_.trim)
            .find(_.startsWith("charset="))
            .map(_.stripPrefix("charset="))
            .filter(Charset.isSupported)
            .map(Charset.forName)
        case _ =>
          None
      }
      (b, charsetOpt0)
    } finally {
      if (conn != null) {
        Try(conn.getInputStream.close())
        conn match {
          case conn0: HttpURLConnection =>
            Try(conn0.getErrorStream.close())
            Try(conn0.disconnect())
          case _ =>
        }
      }
    }

    new String(rawContent, charsetOpt.getOrElse(StandardCharsets.UTF_8))
  }
} 
Example 140
Source File: JupyterApiImpl.scala    From almond   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package almond

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets

import almond.api.{FullJupyterApi, JupyterApi}
import almond.internals.HtmlAnsiOutputStream
import almond.interpreter.api.CommHandler
import ammonite.util.Ref
import pprint.{TPrint, TPrintColors}

import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.reflect.ClassTag


final class JupyterApiImpl(
  execute: Execute,
  commHandlerOpt: => Option[CommHandler],
  replApi: ReplApiImpl,
  silent0: Ref[Boolean]
) extends FullJupyterApi {

  protected def printOnChange[T](
    value: => T,
    ident: String,
    custom: Option[String],
    onChange: Option[(T => Unit) => Unit],
    onChangeOrError: Option[(Either[Throwable, T] => Unit) => Unit]
  )(implicit
    tprint: TPrint[T],
    tcolors: TPrintColors,
    classTagT: ClassTag[T]
  ): Iterator[String] =
    replApi.printSpecial(value, ident, custom, onChange, onChangeOrError, replApi.pprinter, Some(updatableResults))(tprint, tcolors, classTagT).getOrElse {
      replApi.Internal.print(value, ident, custom)(tprint, tcolors, classTagT)
    }

  override def silent(s: Boolean): Unit = silent0.update(s)
  override def silent: Boolean = silent0.apply()

  protected def ansiTextToHtml(text: String): String = {
    val baos = new ByteArrayOutputStream
    val haos = new HtmlAnsiOutputStream(baos)
    haos.write(text.getBytes(StandardCharsets.UTF_8))
    haos.close()
    baos.toString("UTF-8")
  }

  def stdinOpt(prompt: String, password: Boolean): Option[String] =
    for (m <- execute.currentInputManagerOpt)
      yield Await.result(m.readInput(prompt, password), Duration.Inf)

  override def changingPublish =
    execute.currentPublishOpt.getOrElse(super.changingPublish)
  override def commHandler =
    commHandlerOpt.getOrElse(super.commHandler)

  protected def updatableResults0: JupyterApi.UpdatableResults =
    execute.updatableResults
} 
Example 141
Source File: ByteBufferOutputStream.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.rpc.serializer

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
} 
Example 142
Source File: MarkdownReporterTest.scala    From drunken-data-quality   with Apache License 2.0 5 votes vote down vote up
package de.frosner.ddq.reporters

import java.io.{ByteArrayOutputStream, PrintStream}

import de.frosner.ddq.constraints._
import de.frosner.ddq.core._
import de.frosner.ddq.testutils.{DummyConstraint, DummyConstraintResult}
import org.apache.spark.sql.DataFrame
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{FlatSpec, Matchers}

class MarkdownReporterTest extends FlatSpec with Matchers with MockitoSugar {

  "A Markdown reporter" should "produce correct output for a check with constraints" in {
    val baos = new ByteArrayOutputStream()
    val markdownReporter = new MarkdownReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val dfName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $dfName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."
    val message1 = "1"
    val status1 = ConstraintSuccess
    val constraint1 = DummyConstraint(message1, status1)
    val result1 = constraint1.fun(df)

    val message2 = "2"
    val status2 = ConstraintFailure
    val constraint2 = DummyConstraint(message2, status2)
    val result2 = constraint2.fun(df)

    val message3 = "3"
    val status3 = ConstraintError(new IllegalArgumentException())
    val constraint3 = DummyConstraint(message3, status3)
    val result3 = DummyConstraintResult(constraint3, message3, status3)

    val constraints = Map[Constraint, ConstraintResult[Constraint]](
      constraint1 -> result1,
      constraint2 -> result2,
      constraint3 -> result3
    )

    val check = Check(df, Some(dfName), Option.empty, constraints.keys.toSeq)

    markdownReporter.report(CheckResult(constraints, check, dfCount))
    val expectedOutput = s"""**$header**

$prologue

- *SUCCESS*: ${result1.message}
- *FAILURE*: ${result2.message}
- *ERROR*: ${result3.message}

"""

    baos.toString shouldBe expectedOutput
  }

  it should "produce correct output for a check without constraint" in {
    val baos = new ByteArrayOutputStream()
    val markdownReporter = new MarkdownReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val dfName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $dfName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."
    val check = Check(df, Some(dfName), Option.empty, Seq.empty)

    markdownReporter.report(CheckResult(Map.empty, check, dfCount))
    val expectedOutput = s"""**$header**

$prologue

Nothing to check!

"""

    baos.toString shouldBe expectedOutput
  }

} 
Example 143
Source File: ConsoleReporterTest.scala    From drunken-data-quality   with Apache License 2.0 5 votes vote down vote up
package de.frosner.ddq.reporters

import java.io.{ByteArrayOutputStream, PrintStream}

import de.frosner.ddq.constraints._
import de.frosner.ddq.core._
import de.frosner.ddq.testutils.{DummyConstraint, DummyConstraintResult}
import org.apache.spark.sql.DataFrame
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{FlatSpec, Matchers}

class ConsoleReporterTest extends FlatSpec with Matchers with MockitoSugar {

  "A Console reporter" should "produce correct output for a check with constraints" in {
    val baos = new ByteArrayOutputStream()
    val consoleReporter = new ConsoleReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val displayName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $displayName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."

    val message1 = "1"
    val status1 = ConstraintSuccess
    val constraint1 = DummyConstraint(message1, status1)
    val result1 = constraint1.fun(df)

    val message2 = "2"
    val status2 = ConstraintFailure
    val constraint2 = DummyConstraint(message2, status2)
    val result2 = constraint2.fun(df)

    val message3 = "3"
    val status3 = ConstraintError(new IllegalArgumentException())
    val constraint3 = DummyConstraint(message3, status3)
    val result3 = DummyConstraintResult(constraint3, message3, status3)

    val constraints = Map[Constraint, ConstraintResult[Constraint]](
      constraint1 -> result1,
      constraint2 -> result2,
      constraint3 -> result3
    )
    val check = Check(df, Some(displayName), Option.empty, constraints.keys.toSeq)

    consoleReporter.report(CheckResult(constraints, check, dfCount))
    val expectedOutput = s"""${Console.BLUE}$header${Console.RESET}
${Console.BLUE}$prologue${Console.RESET}
${Console.GREEN}- ${result1.message}${Console.RESET}
${Console.RED}- ${result2.message}${Console.RESET}
${Console.YELLOW}- ${result3.message}${Console.RESET}

"""

    baos.toString shouldBe expectedOutput
  }

  it should "produce correct output for a check without constraint" in {
    val baos = new ByteArrayOutputStream()
    val consoleReporter = new ConsoleReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val displayName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $displayName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."
    val check = Check(df, Some(displayName), Option.empty, Seq.empty)

    consoleReporter.report(CheckResult(Map.empty, check, dfCount))
    val expectedOutput = s"""${Console.BLUE}$header${Console.RESET}
${Console.BLUE}$prologue${Console.RESET}
${Console.BLUE}Nothing to check!${Console.RESET}

"""

    baos.toString shouldBe expectedOutput
  }

} 
Example 144
Source File: ClientSpec.scala    From scala-ipfs-api   with MIT License 5 votes vote down vote up
package io.ipfs.api

import java.io.{ByteArrayOutputStream, InputStream}
import java.nio.file.{Files, Paths, StandardOpenOption}
import java.util

import io.ipfs.api.ClientSpec._
import org.specs2.mutable._

import scala.util.Random

class ClientSpec extends Specification {
  isolated

  val client = new Client("localhost")
  "IPFS client" should {

    "show the version" in  {
      client.version mustEqual "0.4.2"
    }

    "have an ID" in {
      client.id.ID.length mustNotEqual 0
    }

    "store data" in {
      val name = randomName
      val add = store(name = name)
      add.length mustEqual 1
      val added = add(0)
      added.Name mustEqual name
      added.Hash.length mustNotEqual 0
    }

    "cat data" in {
      val data = randomBytes
      val added = store(data = data)(0)

      val in: InputStream = client.cat(added.Hash)
      util.Arrays.equals(toArray(in), data) mustEqual true
    }

    "dht put and get" in {
      val (key, value) = (random.nextString(10), random.nextString(10))
      val puts: Array[DHTResponse] = client.dhtPut(key, value)
      puts.length mustNotEqual 0

      client.dhtGet(key).Extra mustEqual value
    }
  }

  private def randomBytes = {
    val buffer = new Array[Byte](0x1500)
    random.nextBytes(buffer)
    buffer
  }

  private def store(name: String = randomName, data: Array[Byte] = randomBytes): Array[Add] = {
    val storePath = Paths.get(name)
    Files.write(storePath, data, StandardOpenOption.CREATE)
    client.add(Array(storePath))
  }
}

object ClientSpec {
  val random = new Random(666)
  def randomName: String = random.nextInt()+".test.dat"

  def toArray(in: InputStream): Array[Byte] = {
    val out = new ByteArrayOutputStream()
    try {
      val buff  = new Array[Byte](0x1000)
      var nRead = 0
      while ( {nRead = in.read(buff);nRead} != -1)
        out.write(buff, 0, nRead)
    } finally {
      in.close()
    }
    out.toByteArray
  }
} 
Example 145
Source File: BigBgenDatasource.scala    From glow   with Apache License 2.0 5 votes vote down vote up
package io.projectglow.bgen

import java.io.ByteArrayOutputStream

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLUtils}
import org.apache.spark.sql.sources.DataSourceRegister

import io.projectglow.common.logging.{HlsEventRecorder, HlsTagValues}
import io.projectglow.sql.BigFileDatasource
import io.projectglow.sql.util.ComDatabricksDataSource

class BigBgenDatasource extends BigFileDatasource with DataSourceRegister {

  override def shortName(): String = "bigbgen"

  override def serializeDataFrame(
      options: Map[String, String],
      data: DataFrame): RDD[Array[Byte]] = {
    BigBgenDatasource.serializeDataFrame(options, data)
  }

}

class ComDatabricksBigBgenDatasource extends BigBgenDatasource with ComDatabricksDataSource

object BigBgenDatasource extends HlsEventRecorder {

  import io.projectglow.common.BgenOptions._

  private def parseOptions(options: Map[String, String]): BigBgenOptions = {
    val bitsPerProb = options.getOrElse(BITS_PER_PROB_KEY, BITS_PER_PROB_DEFAULT_VALUE).toInt
    val maxPloidy = options.getOrElse(MAX_PLOIDY_KEY, MAX_PLOIDY_VALUE).toInt
    val defaultPloidy = options.getOrElse(DEFAULT_PLOIDY_KEY, DEFAULT_PLOIDY_VALUE).toInt
    val defaultPhasing = options.getOrElse(DEFAULT_PHASING_KEY, DEFAULT_PHASING_VALUE).toBoolean
    BigBgenOptions(bitsPerProb, maxPloidy, defaultPloidy, defaultPhasing)
  }

  private def logBgenWrite(parsedOptions: BigBgenOptions): Unit = {
    val logOptions = Map(
      BITS_PER_PROB_KEY -> parsedOptions.bitsPerProb,
      MAX_PLOIDY_KEY -> parsedOptions.maxPloidy,
      DEFAULT_PLOIDY_KEY -> parsedOptions.defaultPloidy,
      DEFAULT_PHASING_KEY -> parsedOptions.defaultPhasing
    )
    recordHlsEvent(HlsTagValues.EVENT_BGEN_WRITE, logOptions)
  }

  def serializeDataFrame(options: Map[String, String], data: DataFrame): RDD[Array[Byte]] = {

    val parsedOptions = parseOptions(options)
    logBgenWrite(parsedOptions)

    val dSchema = data.schema
    val numVariants = data.count
    val rawRdd = data.queryExecution.toRdd

    val inputRdd = if (rawRdd.getNumPartitions == 0) {
      logger.warn("Writing BGEN header only as the input DataFrame has zero partitions.")
      SQLUtils.createEmptyRDD(data.sparkSession)
    } else {
      rawRdd
    }

    inputRdd.mapPartitionsWithIndex {
      case (idx, it) =>
        val baos = new ByteArrayOutputStream()

        val writeHeader = idx == 0
        val writer = new BgenRecordWriter(
          baos,
          dSchema,
          writeHeader,
          numVariants,
          parsedOptions.bitsPerProb,
          parsedOptions.maxPloidy,
          parsedOptions.defaultPloidy,
          parsedOptions.defaultPhasing
        )

        it.foreach { row =>
          writer.write(row)
        }

        writer.close()
        Iterator(baos.toByteArray)
    }
  }
}

case class BigBgenOptions(
    bitsPerProb: Int,
    maxPloidy: Int,
    defaultPloidy: Int,
    defaultPhasing: Boolean) 
Example 146
Source File: ModelStateSerde.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafkastreams.store.store

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.util

import com.lightbend.model.modeldescriptor.ModelDescriptor
import com.lightbend.scala.modelServer.model.PMML.PMMLModel
import com.lightbend.scala.modelServer.model.tensorflow.TensorFlowModel
import com.lightbend.scala.modelServer.model.{ModelToServeStats, ModelWithDescriptor}
import com.lightbend.scala.kafkastreams.store.StoreState
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}


class ModelStateSerde extends Serde[StoreState] {

  private val mserializer = new ModelStateSerializer()
  private val mdeserializer = new ModelStateDeserializer()

  override def deserializer() = mdeserializer

  override def serializer() = mserializer

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}

  override def close() = {}
}

object ModelStateDeserializer {
  val factories = Map(
    ModelDescriptor.ModelType.PMML.index -> PMMLModel,
    ModelDescriptor.ModelType.TENSORFLOW.index -> TensorFlowModel
  )
}

class ModelStateDeserializer extends Deserializer[StoreState] {

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}

  override def deserialize(topic: String, data: Array[Byte]): StoreState = {
    if(data != null) {
      val input = new DataInputStream(new ByteArrayInputStream(data))
      new StoreState(ModelWithDescriptor.readModel(input), ModelWithDescriptor.readModel(input),
        ModelToServeStats.readServingInfo(input), ModelToServeStats.readServingInfo(input))
    }
    else new StoreState()
  }

  override def close(): Unit = {}

}

class ModelStateSerializer extends Serializer[StoreState] {

  private val bos = new ByteArrayOutputStream()

  override def serialize(topic: String, state: StoreState): Array[Byte] = {
    bos.reset()
    val output = new DataOutputStream(bos)
    ModelWithDescriptor.writeModel(output, state.currentModel.orNull)
    ModelWithDescriptor.writeModel(output, state.newModel.orNull)
    ModelToServeStats.writeServingInfo(output, state.currentState.orNull)
    ModelToServeStats.writeServingInfo(output, state.newState.orNull)
    try {
      output.flush()
      output.close()
    } catch {
      case t: Throwable =>
    }
    bos.toByteArray
  }

  override def close(): Unit = {}

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}
} 
Example 147
Source File: VwSparseMultilabelPredictorTest.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.models.vw.jni.multilabel

import java.io.{ByteArrayOutputStream, File, FileInputStream}

import com.eharmony.aloha.ModelSerializationTestHelper
import com.eharmony.aloha.io.sources.{Base64StringSource, ExternalSource, ModelSource}
import org.apache.commons.codec.binary.Base64
import org.apache.commons.io.IOUtils
import org.junit.Assert._
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner
import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners}


@RunWith(classOf[BlockJUnit4ClassRunner])
class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper {
  import VwSparseMultilabelPredictorTest._

  @Test def testSerializability(): Unit = {
    val predictor = getPredictor(getModelSource(), 3)
    val ds = serializeDeserializeRoundTrip(predictor)
    assertEquals(predictor, ds)
    assertEquals(predictor.vwParams(), ds.vwParams())
    assertNotNull(ds.vwModel)
  }

  @Test def testVwParameters(): Unit = {
    val numLabelsInTrainingSet = 3
    val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet)

    predictor.vwParams() match {
      case Data(vwBinFilePath, ringSize) =>
        checkVwBinFile(vwBinFilePath)
        checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt)
      case ps => fail(s"Unexpected VW parameters format.  Found string: $ps")
    }
  }
}

object VwSparseMultilabelPredictorTest {
  private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r

  private def getModelSource(): ModelSource = {
    val f = File.createTempFile("i_dont", "care")
    f.deleteOnExit()
    val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}")
    learner.close()
    val baos = new ByteArrayOutputStream()
    IOUtils.copy(new FileInputStream(f), baos)
    val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray))
    ExternalSource(src.localVfs)
  }

  private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) =
    VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet)

  private def checkVwBinFile(vwBinFilePath: String): Unit = {
    val vwBinFile = new File(vwBinFilePath)
    assertTrue("VW binary file should have been written to disk", vwBinFile.exists())
    vwBinFile.deleteOnExit()
  }

  private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = {
    assertEquals(
      "vw --ring_size parameter is incorrect:",
      numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize,
      ringSize.toInt
    )
  }
} 
Example 148
Source File: PrintProtosTest.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.cli.dataset

import java.io.{ByteArrayOutputStream, IOException}
import java.util.Arrays

import com.eharmony.aloha.test.proto.Testing.{PhotoProto, UserProto}
import com.eharmony.aloha.test.proto.Testing.GenderProto.{FEMALE, MALE}
import com.google.protobuf.GeneratedMessage
import org.apache.commons.codec.binary.Base64
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner
import org.junit.{Ignore, Test}


@RunWith(classOf[BlockJUnit4ClassRunner])
@Ignore
class PrintProtosTest {
    @Test def testPrintProtos(): Unit = {
        System.out.println(alan)
        System.out.println(kate)
    }

    @throws(classOf[IOException])
    def alan: String = {
        val t = UserProto.newBuilder.
            setId(1).
            setName("Alan").
            setGender(MALE).
            setBmi(23).
            addAllPhotos(Arrays.asList(
                PhotoProto.newBuilder.
                    setId(1).
                    setAspectRatio(1).
                    setHeight(1).
                    build,
                PhotoProto.newBuilder.
                    setId(2).
                    setAspectRatio(2).
                    setHeight(2).build
            )).build
        b64(t)
    }

    def kate: String = {
        val t = UserProto.newBuilder.
            setId(1).
            setName("Kate").
            setGender(FEMALE).
            addAllPhotos(Arrays.asList(
                PhotoProto.newBuilder.
                    setId(3).
                    setAspectRatio(3).
                    setHeight(3).
                    build
            )).build
        b64(t)
    }

    def b64[M <: GeneratedMessage](p: M): String = {
        val baos: ByteArrayOutputStream = new ByteArrayOutputStream
        p.writeTo(baos)
        new String(Base64.encodeBase64(baos.toByteArray))
    }
} 
Example 149
Source File: ReadableByString.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.io

import java.io.{InputStreamReader, ByteArrayOutputStream, Reader, InputStream}
import org.apache.commons.io.IOUtils


    final def fromReader(r: Reader): A = {
        try {
            val baos = new ByteArrayOutputStream  // Don't need to close.
            IOUtils.copy(r, baos, inputCharset)
            fromString(new String(baos.toByteArray))
        }
        finally {
            IOUtils.closeQuietly(r)
        }
    }
} 
Example 150
Source File: ContainerReadableByString.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.io

import scala.language.higherKinds
import org.apache.commons.io.IOUtils
import java.lang.String
import java.io.{ByteArrayOutputStream, Reader, InputStreamReader, InputStream}


    def fromReader[A](r: Reader): C[A] = {
        try {
            val baos = new ByteArrayOutputStream  // Don't need to close.
            IOUtils.copy(r, baos, inputCharset)
            fromString[A](new String(baos.toByteArray))
        }
        finally {
            IOUtils.closeQuietly(r)
        }
    }
} 
Example 151
Source File: SchrodingerExceptionTest.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.ex

import org.junit.{Before, Test}
import org.junit.Assert._
import java.io.{PrintWriter, OutputStreamWriter, ByteArrayOutputStream, PrintStream}

class SchrodingerExceptionTest {

    
    private[this] var ex: SchrodingerException = _

    @Before def before() {
        ex = new SchrodingerException
    }

    @Test def testFillInStackTrace() {
        assertTrue(new SchrodingerException().fillInStackTrace().isInstanceOf[SchrodingerException])
    }

    @Test(expected = classOf[SchrodingerException]) def testGetMessage() {
        ex.getMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetStackTrace() {
        ex.getStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetCause() {
        ex.getCause()
    }

    @Test(expected = classOf[SchrodingerException]) def testSetStackTrace() {
        ex.setStackTrace(Array.empty)
    }

    @Test(expected = classOf[SchrodingerException]) def testGetLocalizedMessage() {
        ex.getLocalizedMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceEmpty() {
        ex.printStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceStream() {
        val baos = new ByteArrayOutputStream()
        val ps = new PrintStream(baos)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceWriter() {
        val baos = new ByteArrayOutputStream()
        val osw = new OutputStreamWriter(baos)
        val ps = new PrintWriter(osw)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testInitCause() {
        ex.initCause(new Throwable)
    }

    @Test(expected = classOf[SchrodingerException]) def testToString() {
        ex.toString()
    }

    @Test def testNoThrowForSchrodingerExceptionWithSchrodingerExceptionCause() {
        new SchrodingerException(new SchrodingerException)
    }

    @Test def testNoThrowForSchrodingerExceptionWithExceptionCause() {
        new SchrodingerException(new Exception)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForThrowableWithSchrodingerExceptionCause() {
        new Throwable(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForExceptionWithSchrodingerExceptionCause() {
        new Exception(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForRuntimeExceptionWithSchrodingerExceptionCause() {
        new RuntimeException(ex)
    }
} 
Example 152
Source File: FileBasedLedgerDataExportSpec.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.export

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.time.Instant

import com.daml.ledger.participant.state.v1
import com.google.protobuf.ByteString
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{Matchers, WordSpec}

class FileBasedLedgerDataExportSpec extends WordSpec with Matchers with MockitoSugar {
  // XXX SC remove in Scala 2.13; see notes in ConfSpec
  import scala.collection.GenTraversable, org.scalatest.enablers.Containing
  private[this] implicit def `fixed sig containingNatureOfGenTraversable`[
      E: org.scalactic.Equality,
      TRAV]: Containing[TRAV with GenTraversable[E]] =
    Containing.containingNatureOfGenTraversable[E, GenTraversable]

  "addParentChild" should {
    "add entry to correlation ID mapping" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")

      instance.correlationIdMapping should contain("child" -> "parent")
    }
  }

  "addToWriteSet" should {
    "append to existing data" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")
      instance.addToWriteSet("child", Seq(keyValuePairOf("a", "b")))
      instance.addToWriteSet("child", Seq(keyValuePairOf("c", "d")))

      instance.bufferedKeyValueDataPerCorrelationId should contain(
        "parent" ->
          Seq(keyValuePairOf("a", "b"), keyValuePairOf("c", "d")))
    }
  }

  "finishedProcessing" should {
    "remove all data such as submission info, write-set and child correlation IDs" in {
      val dataOutputStream = new DataOutputStream(new ByteArrayOutputStream())
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        Instant.now(),
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      instance.inProgressSubmissions shouldBe empty
      instance.bufferedKeyValueDataPerCorrelationId shouldBe empty
      instance.correlationIdMapping shouldBe empty
    }
  }

  "serialized submission" should {
    "be readable back" in {
      val baos = new ByteArrayOutputStream()
      val dataOutputStream = new DataOutputStream(baos)
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      val expectedRecordTimeInstant = Instant.now()
      val expectedParticipantId = v1.ParticipantId.assertFromString("id")
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        expectedRecordTimeInstant,
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      val dataInputStream = new DataInputStream(new ByteArrayInputStream(baos.toByteArray))
      val (actualSubmissionInfo, actualWriteSet) = Serialization.readEntry(dataInputStream)
      actualSubmissionInfo.submissionEnvelope should be(ByteString.copyFromUtf8("an envelope"))
      actualSubmissionInfo.correlationId should be("parent")
      actualSubmissionInfo.recordTimeInstant should be(expectedRecordTimeInstant)
      actualSubmissionInfo.participantId should be(expectedParticipantId)
      actualWriteSet should be(Seq(keyValuePairOf("a", "b")))
    }
  }

  private def keyValuePairOf(key: String, value: String): (ByteString, ByteString) =
    ByteString.copyFromUtf8(key) -> ByteString.copyFromUtf8(value)
} 
Example 153
Source File: DevboxSetupMain.scala    From devbox   with Apache License 2.0 5 votes vote down vote up
package devbox.agent

import java.io.ByteArrayOutputStream

import scala.util.control.NonFatal


object DevboxSetupMain {

  def main(args: Array[String]): Unit = {
    val baos = new ByteArrayOutputStream()
    os.Internals.transfer(System.in, baos)
    val buffer = baos.toByteArray
    val allSetupFilesAndCommands =
      upickle.default.readBinary[Seq[Either[(String, Array[Byte]), String]]](buffer)

    val userName = sys.env.getOrElse("DEVBOX_USER", os.proc("whoami").call().out.trim)

    allSetupFilesAndCommands.foreach{
      case Left((destination, bytes)) =>

        // we run as root, so we need to expand ~ to DEVBOX_USER here
        val expandedDestination = destination match{
          case s"~/$rest" => os.root / "home" / userName / os.SubPath(rest)
          case dest => os.Path(dest)
        }
        try {
          os.write.over(expandedDestination, bytes, createFolders = true)
          os.perms.set(expandedDestination, "rwxrwxrwx")
        } catch {
          case NonFatal(e) =>
            println(s"Error writing file $destination: ${e.getMessage}")
        }
      case Right(cmd) =>
        println("Running remote command: " + cmd)
        os.proc("bash", "-c", cmd).call()
    }
  }
} 
Example 154
Source File: BytecodeUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor}
import org.apache.xbean.asm5.Opcodes._

import org.apache.spark.util.Utils


  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM5) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM5) {
          override def visitMethodInsn(
              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
} 
Example 155
Source File: RawTextSender.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
} 
Example 156
Source File: RateLimitedOutputStreamSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes(StandardCharsets.UTF_8)) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
} 
Example 157
Source File: ByteBufferOutputStream.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


private[spark] class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
} 
Example 158
Source File: PortableDataStream.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.input

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import scala.collection.JavaConverters._

import com.google.common.io.{ByteStreams, Closeables}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}


  def toArray(): Array[Byte] = {
    val stream = open()
    try {
      ByteStreams.toByteArray(stream)
    } finally {
      Closeables.close(stream, true)
    }
  }

  def getPath(): String = path
} 
Example 159
Source File: PythonRDDSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes(StandardCharsets.UTF_8), null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes(StandardCharsets.UTF_8)), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(Iterator(
      (null, null),
      ("a".getBytes(StandardCharsets.UTF_8), null),
      (null, "b".getBytes(StandardCharsets.UTF_8))), buffer)
  }
} 
Example 160
Source File: GenericAvroSerializerSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 161
Source File: SerializerPropertiesSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.util.Random

import org.scalatest.Assertions

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset


class SerializerPropertiesSuite extends SparkFunSuite {

  import SerializerPropertiesSuite._

  test("JavaSerializer does not support relocation") {
    // Per a comment on the SPARK-4550 JIRA ticket, Java serialization appears to write out the
    // full class name the first time an object is written to an output stream, but subsequent
    // references to the class write a more compact identifier; this prevents relocation.
    val ser = new JavaSerializer(new SparkConf())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer supports relocation when auto-reset is enabled") {
    val ser = new KryoSerializer(new SparkConf)
    assert(ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer does not support relocation when auto-reset is disabled") {
    val conf = new SparkConf().set("spark.kryo.registrator",
      classOf[RegistratorWithoutAutoReset].getName)
    val ser = new KryoSerializer(conf)
    assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

}

object SerializerPropertiesSuite extends Assertions {

  def generateRandomItem(rand: Random): Any = {
    val randomFunctions: Seq[() => Any] = Seq(
      () => rand.nextInt(),
      () => rand.nextString(rand.nextInt(10)),
      () => rand.nextDouble(),
      () => rand.nextBoolean(),
      () => (rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => {
        val x = MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10)))
        (x, x)
      }
    )
    randomFunctions(rand.nextInt(randomFunctions.size)).apply()
  }

  def testSupportsRelocationOfSerializedObjects(
      serializer: Serializer,
      generateRandomItem: Random => Any): Unit = {
    if (!serializer.supportsRelocationOfSerializedObjects) {
      return
    }
    val NUM_TRIALS = 5
    val rand = new Random(42)
    for (_ <- 1 to NUM_TRIALS) {
      val items = {
        // Make sure that we have duplicate occurrences of the same object in the stream:
        val randomItems = Seq.fill(10)(generateRandomItem(rand))
        randomItems ++ randomItems.take(5)
      }
      val baos = new ByteArrayOutputStream()
      val serStream = serializer.newInstance().serializeStream(baos)
      def serializeItem(item: Any): Array[Byte] = {
        val itemStartOffset = baos.toByteArray.length
        serStream.writeObject(item)
        serStream.flush()
        val itemEndOffset = baos.toByteArray.length
        baos.toByteArray.slice(itemStartOffset, itemEndOffset).clone()
      }
      val itemsAndSerializedItems: Seq[(Any, Array[Byte])] = {
        val serItems = items.map {
          item => (item, serializeItem(item))
        }
        serStream.close()
        rand.shuffle(serItems)
      }
      val reorderedSerializedData: Array[Byte] = itemsAndSerializedItems.flatMap(_._2).toArray
      val deserializedItemsStream = serializer.newInstance().deserializeStream(
        new ByteArrayInputStream(reorderedSerializedData))
      assert(deserializedItemsStream.asIterator.toSeq === itemsAndSerializedItems.map(_._1))
      deserializedItemsStream.close()
    }
  }
}

private case class MyCaseClass(foo: Int, bar: String) 
Example 162
Source File: BookSerializer.scala    From akka-serialization-test   with Apache License 2.0 5 votes vote down vote up
package com.github.dnvriend.serializer.avro

import java.io.ByteArrayOutputStream

import com.github.dnvriend.domain.BookStore.{ ChangedBookV3, ChangedBookV2, ChangedBookV1 }
import com.sksamuel.avro4s.{ AvroBinaryOutputStream, AvroInputStream, AvroOutputStream }

abstract class BookSerializer[T] extends AvroSerializer[T] {

  final val Manifest = "ChangedBook"

}

class BookSerializerV1 extends BookSerializer[ChangedBookV1] {
  override def identifier: Int = 200011

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroOutputStream[ChangedBookV1](output)
    avro.write(o.asInstanceOf[ChangedBookV1])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    if (Manifest == manifest) {

      val is = AvroInputStream[ChangedBookV1](bytes)
      val events = is.iterator.toList
      is.close()

      events(0)

    } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
}

class BookSerializerV2 extends BookSerializer[ChangedBookV2] {
  override def identifier: Int = 200012

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroBinaryOutputStream[ChangedBookV2](output)
    avro.write(o.asInstanceOf[ChangedBookV2])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    // if (Manifest == manifest) {
    println("Manifest " + manifest)
    val is = AvroInputStream[ChangedBookV2](bytes)
    val events = is.iterator.toList
    is.close()

    events(0)

    // } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
}

class BookSerializerV3 extends BookSerializer[ChangedBookV3] {
  override def identifier: Int = 200013

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroOutputStream[ChangedBookV3](output)
    avro.write(o.asInstanceOf[ChangedBookV3])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    // if (Manifest == manifest) {
    println("Manifest " + manifest)
    val is = AvroInputStream[ChangedBookV3](bytes)
    val events = is.iterator.toList
    is.close()

    events(0)

    // } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
} 
Example 163
Source File: MovieChangedSerializer.scala    From akka-serialization-test   with Apache License 2.0 5 votes vote down vote up
package com.github.dnvriend.serializer.avro

import java.io.ByteArrayOutputStream

import com.github.dnvriend.domain.Movie.MovieChanged
import com.sksamuel.avro4s.{ AvroInputStream, AvroOutputStream }

class MovieChangedSerializer extends AvroSerializer[MovieChanged] {
  override def identifier: Int = 100011
  final val Manifest = classOf[MovieChanged].getName

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroOutputStream[MovieChanged](output)
    avro.write(o.asInstanceOf[MovieChanged])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    if (Manifest == manifest) {

      val is = AvroInputStream[MovieChanged](bytes)
      val events = is.iterator.toList
      is.close()

      events(0)

    } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
} 
Example 164
Source File: PLYReadWriteTests.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter}
import java.nio.ByteOrder
import java.util.Scanner

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.ply._

class PLYReadWriteTests extends FacesTestSuite {

  describe("Write-read cycles to string, big- and little endian") {

    def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val writer = new SequenceWriter[A]
      writer.write(toWrite, os, bo)

      val ba = os.toByteArray

      val is = new ByteArrayInputStream(ba)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, is, bo)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val osw = new OutputStreamWriter(os)
      val writer = new SequenceWriter[A]
      writer.write(toWrite, osw)
      osw.flush()

      val is = new ByteArrayInputStream(os.toByteArray)
      val isr = new Scanner(is)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, isr)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      testRWStringCycle(toWrite)
      testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN)
      testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN)
    }

    it("should result in the same sequence of bytes") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of char") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of short") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of int") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of long") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of float") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of double") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255)
      testAllThreeCycles(toWrite)
    }

  }

} 
Example 165
Source File: RewriteSwaggerConfigPlugin.scala    From matcher   with MIT License 5 votes vote down vote up
import java.io.{BufferedInputStream, ByteArrayOutputStream}
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import Dependencies.Version
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.io.IOUtils
import sbt.Keys._
import sbt._

// See https://github.com/swagger-api/swagger-ui/issues/5710
object RewriteSwaggerConfigPlugin extends AutoPlugin {
  override val trigger = PluginTrigger.NoTrigger
  override def projectSettings: Seq[Def.Setting[_]] =
    inConfig(Compile)(
      Seq(
        resourceGenerators += Def.task {
          val jarName       = s"swagger-ui-${Version.swaggerUi}.jar"
          val indexHtmlPath = s"META-INF/resources/webjars/swagger-ui/${Version.swaggerUi}/index.html"
          val outputFile    = resourceManaged.value / indexHtmlPath

          val html = (Compile / dependencyClasspath).value
            .find(_.data.getName == jarName)
            .flatMap(jar => fileContentFromJar(jar.data, indexHtmlPath))
            .map { new String(_, StandardCharsets.UTF_8) }

          val resource = s"$jarName:$indexHtmlPath"
          html match {
            case None => throw new RuntimeException(s"Can't find $resource")
            case Some(html) =>
              val doc = org.jsoup.parser.Parser.parse(html, "127.0.0.1")
              import scala.collection.JavaConverters._
              doc
                .body()
                .children()
                .asScala
                .find { el =>
                  el.tagName() == "script" && el.html().contains("SwaggerUIBundle")
                } match {
                case None => throw new RuntimeException("Can't patch script in index.html")
                case Some(el) =>
                  val update =
                    """
const ui = SwaggerUIBundle({
    url: "/api-docs/swagger.json",
    dom_id: '#swagger-ui',
    deepLinking: true,
    presets: [ SwaggerUIBundle.presets.apis ],
    plugins: [ SwaggerUIBundle.plugins.DownloadUrl ],
    layout: "BaseLayout",
    operationsSorter: "alpha"
});
window.ui = ui;
"""
                  // Careful! ^ will be inserted as one-liner
                  el.text(update)
              }

              Files.createDirectories(outputFile.getParentFile.toPath)
              IO.write(outputFile, doc.outerHtml())
          }

          Seq(outputFile)
        }.taskValue
      ))

  private def fileContentFromJar(jar: File, fileName: String): Option[Array[Byte]] = {
    val fs      = new BufferedInputStream(Files.newInputStream(jar.toPath))
    val factory = new ArchiveStreamFactory()
    val ais     = factory.createArchiveInputStream(fs)

    try Iterator
      .continually(ais.getNextEntry)
      .takeWhile(_ != null)
      .filter(ais.canReadEntryData)
      .find(_.getName == fileName)
      .map { _ =>
        val out = new ByteArrayOutputStream()
        IOUtils.copy(ais, out)
        out.toByteArray
      } finally fs.close()
  }
} 
Example 166
Source File: Block.scala    From matcher   with MIT License 5 votes vote down vote up
package com.wavesplatform.dex.it.config.genesis

import java.io.ByteArrayOutputStream

import com.google.common.primitives.{Bytes, Ints, Longs}
import com.wavesplatform.dex.domain.account.{KeyPair, PublicKey}
import com.wavesplatform.dex.domain.bytes.ByteStr
import com.wavesplatform.dex.domain.crypto
import com.wavesplatform.dex.domain.crypto.Authorized
import com.wavesplatform.dex.domain.error.ValidationError.GenericError
import monix.eval.Coeval


case class Block(timestamp: Long,
                 version: Byte,
                 reference: ByteStr,
                 signerData: SignerData,
                 consensusData: NxtLikeConsensusBlockData,
                 transactionData: Seq[GenesisTransaction])
    extends Authorized {

  override val sender: PublicKey = signerData.generator
  private val maxLength: Int     = 150 * 1024

  private val transactionField: Array[Byte] = {

    val serTxCount = Array(transactionData.size.toByte)
    val byteBuffer = new ByteArrayOutputStream(transactionData.size * maxLength / 2)

    byteBuffer.write(serTxCount, 0, serTxCount.length)

    transactionData.foreach { tx =>
      val txBytes = tx.bytes()
      val txSize  = Bytes.ensureCapacity(Ints.toByteArray(txBytes.length), 4, 0)

      byteBuffer.write(txSize, 0, txSize.length)
      byteBuffer.write(txBytes, 0, txBytes.length)
    }

    byteBuffer.toByteArray
  }

  val bytes: Coeval[Array[Byte]] = Coeval.evalOnce {

    val txBytesSize = transactionField.length
    val txBytes     = Bytes.ensureCapacity(Ints.toByteArray(txBytesSize), 4, 0) ++ transactionField

    val consensusField = Bytes.ensureCapacity(Longs.toByteArray(consensusData.baseTarget), 8, 0) ++ consensusData.generationSignature.arr

    val cBytesSize = consensusField.length
    val cBytes     = Bytes.ensureCapacity(Ints.toByteArray(cBytesSize), 4, 0) ++ consensusField

    Array(version) ++
      Bytes.ensureCapacity(Longs.toByteArray(timestamp), 8, 0) ++
      reference.arr ++
      cBytes ++
      txBytes ++
      signerData.generator.arr ++
      signerData.signature.arr
  }
}

object Block {

  val MaxFeaturesInBlock: Int       = 64
  val GeneratorSignatureLength: Int = 32

  def build(version: Byte,
            timestamp: Long,
            reference: ByteStr,
            consensusData: NxtLikeConsensusBlockData,
            transactionData: Seq[GenesisTransaction],
            signerData: SignerData): Either[GenericError, Block] = {
    (for {
      _ <- Either.cond(reference.arr.length == crypto.SignatureLength, (), "Incorrect reference")
      _ <- Either.cond(consensusData.generationSignature.arr.length == GeneratorSignatureLength, (), "Incorrect consensusData.generationSignature")
      _ <- Either.cond(signerData.generator.length == crypto.KeyLength, (), "Incorrect signer")
    } yield Block(timestamp, version, reference, signerData, consensusData, transactionData)).left.map(GenericError(_))
  }

  def buildAndSign(version: Byte,
                   timestamp: Long,
                   reference: ByteStr,
                   consensusData: NxtLikeConsensusBlockData,
                   transactionData: Seq[GenesisTransaction],
                   signer: KeyPair): Either[GenericError, Block] =
    build(version, timestamp, reference, consensusData, transactionData, SignerData(signer, ByteStr.empty)).right
      .map(unsigned => unsigned.copy(signerData = SignerData(signer, ByteStr(crypto.sign(signer, unsigned.bytes.value)))))
} 
Example 167
Source File: Main.scala    From jardiff   with Apache License 2.0 5 votes vote down vote up
package scala.tools.jardiff

import java.io.{ByteArrayOutputStream, File, PrintWriter}
import java.nio.file._

import org.apache.commons.cli
import org.apache.commons.cli.{CommandLine, DefaultParser, HelpFormatter, Options}
import org.eclipse.jgit.util.io.NullOutputStream

import scala.collection.JavaConverters.collectionAsScalaIterableConverter
import scala.util.Try
import scala.util.control.NonFatal

object Main {
  def main(args: Array[String]): Unit = {
    run(args) match {
      case ShowUsage(msg) => System.err.println(msg); sys.exit(-1)
      case Error(err) => err.printStackTrace(System.err); sys.exit(-1)
      case Success(diffFound) => sys.exit(if (diffFound) 1 else 0)
    }
  }

  private object Opts {
    val Help = new cli.Option("h", "help", false, "Display this message")
    val Git = new cli.Option("g", "git", true, "Directory to output a git repository containing the diff")
    Git.setArgName("dir")
    val NoCode = new cli.Option("c", "suppress-code", false, "Suppress method bodies")
    val Raw = new cli.Option("r", "raw", false, "Disable sorting and filtering of classfile contents")
    val NoPrivates = new cli.Option("p", "suppress-privates", false, "Display only non-private members")
    val ContextLines = new cli.Option("U", "unified", true, "Number of context lines in diff")
    val Quiet = new cli.Option("q", "quiet", false, "Don't output diffs to standard out")
    val Ignore = new cli.Option("i", "ignore", true, "File pattern to ignore rendered files in gitignore format")
    Ignore.setArgs(cli.Option.UNLIMITED_VALUES)
    ContextLines.setArgName("n")
    def apply(): Options = {
      new cli.Options().addOption(Help).addOption(Git).addOption(ContextLines).addOption(NoCode).addOption(Raw).addOption(NoPrivates).addOption(Quiet).addOption(Ignore)
    }
  }
  private implicit class RichCommandLine(val self: CommandLine) {
    def has(o: cli.Option): Boolean = self.hasOption(o.getOpt)
    def get(o: cli.Option): String = self.getOptionValue(o.getOpt)
    def getOptInt(o: cli.Option): Option[Int] = Option(self.getOptionValue(o.getOpt)).map(x => Try(x.toInt).getOrElse(throw new cli.ParseException("--" + o.getLongOpt + " requires an integer")))
  }

  private def helpText: String = {
    val formatter = new HelpFormatter
    val baos = new ByteArrayOutputStream()
    val writer = new PrintWriter(baos)
    try {
      val footer = s" VERSION1 [VERSION2 ...]\n\nEach VERSION may designate a single file, a directory, JAR file or a `${File.pathSeparator}`-delimited classpath\n\n"
      formatter.printHelp(writer, 80, "jardiff", footer, Opts(), HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, "", true)
      writer.flush()
      baos.toString().replaceFirst("\\n", "")

    } finally {
      writer.close()
    }
  }

  def run(args: Array[String]): RunResult = {
    val parser = new DefaultParser

    try {
      val line = parser.parse(Opts(), args)
      val trailingArgs = line.getArgList
      if (line.has(Opts.Help)) {
        ShowUsage(helpText)
      } else {
        val gitRepo = if (line.has(Opts.Git)) Some(Paths.get(line.get(Opts.Git))) else None
        val diffOutputStream = if (line.has(Opts.Quiet)) NullOutputStream.INSTANCE else System.out
        val config = JarDiff.Config(gitRepo, !line.has(Opts.NoCode), line.has(Opts.Raw),
          !line.has(Opts.NoPrivates), line.getOptInt(Opts.ContextLines), diffOutputStream,
          Option(line.getOptionValues(Opts.Ignore.getOpt)).toList.flatten
        )
        val paths = trailingArgs.asScala.toList.map(JarDiff.expandClassPath)
        paths match {
          case Nil => ShowUsage(helpText)
          case _ =>
            val jarDiff = JarDiff(paths, config)
            val diffFound = jarDiff.diff()
            Success(diffFound)
        }
      }
    } catch {
      case exp: cli.ParseException => ShowUsage(helpText)
      case NonFatal(t) => Error(t)
    }
  }
}

sealed abstract class RunResult
case class ShowUsage(msg: String) extends RunResult
case class Error(err: Throwable) extends RunResult
case class Success(diffFound: Boolean) extends RunResult 
Example 168
Source File: JavaSerializationConverter.scala    From scala-serialization   with MIT License 5 votes vote down vote up
package com.komanov.serialization.converters

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.komanov.serialization.converters.IoUtils.using
import com.komanov.serialization.domain.{Site, SiteEvent, SiteEventData}

object JavaSerializationConverter extends MyConverter {

  override def toByteArray(site: Site): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(site)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def fromByteArray(bytes: Array[Byte]): Site = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[Site]
      }
    }
  }

  override def toByteArray(event: SiteEvent): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(event)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def siteEventFromByteArray(clazz: Class[_], bytes: Array[Byte]): SiteEvent = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[SiteEvent]
      }
    }
  }

} 
Example 169
Source File: EventsReportGenerator.scala    From scala-serialization   with MIT License 5 votes vote down vote up
package com.komanov.serialization.converters

import java.io.{ByteArrayOutputStream, File}
import java.nio.file.{Files, StandardOpenOption}
import java.util.zip.GZIPOutputStream

import com.komanov.serialization.converters.IoUtils._


object EventsReportGenerator extends App {

  val flush = false

  val dir = new File(new File(System.getProperty("user.home"), "123"), "events")
  require(!flush || dir.exists() || dir.mkdirs())

  val (raws, gzips, both) = (Seq.newBuilder[(String, Seq[Int])], Seq.newBuilder[(String, Seq[Int])], Seq.newBuilder[(String, Seq[Int])])

  for ((converterName, converter) <- Converters.all if converter ne ScroogeConverter if converter ne ScalaPbConverter) {
    val results = Seq.newBuilder[(Int, Int)]
    for ((name, site, events) <- TestData.all) {
      val bytes = converter.toByteArray(site)
      val gzipLen = getGzipByteLength(bytes)

      val eventsAndBytes = events.map(e => e -> converter.toByteArray(e.event))
      val eventsLen = eventsAndBytes.map(_._2.length).sum
      val eventsGzipLen = eventsAndBytes.map(_._2).map(getGzipByteLength).sum

      results += bytes.length -> gzipLen
      results += eventsLen -> eventsGzipLen

      if (flush) {
        val normalizedConverterName = converterName.toLowerCase().replace(" ", "-")
        Files.write(dir.getParentFile.toPath.resolve(s"site_${name}_$normalizedConverterName.bin"), bytes, StandardOpenOption.CREATE)
        for ((event, eventBytes) <- eventsAndBytes) {
          Files.write(dir.toPath.resolve(s"${name}_${normalizedConverterName}_${event.event.getClass.getSimpleName}.bin"), eventBytes, StandardOpenOption.CREATE)
        }
      }
    }

    raws += converterName -> results.result().map(_._1)
    gzips += converterName -> results.result().map(_._2)
    both += (converterName + " (rw)") -> results.result().map(_._1)
    both += (converterName + " (gz)") -> results.result().map(_._2)
  }

  println("Data Sizes (raw)")
  printHeaders
  printSizes(raws.result())

  println("Data Sizes (gzip)")
  printHeaders
  printSizes(gzips.result())

  println("Data Sizes")
  printHeaders
  printSizes(both.result())

  private def printHeaders: Any = {
    println("Converter," + TestData.sites.flatMap(t => Seq(t._1, "ev " + t._1)).mkString(","))
  }

  private def printSizes(all: Seq[(String, Seq[Int])]): Unit = {
    for ((name, list) <- all) {
      println(name + "," + list.mkString(","))
    }
  }

  private def getGzipByteLength(bytes: Array[Byte]): Int = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new GZIPOutputStream(baos)) { os =>
        os.write(bytes)
      }
      baos.toByteArray.length
    }
  }

} 
Example 170
Source File: ReportGenerator.scala    From scala-serialization   with MIT License 5 votes vote down vote up
package com.komanov.serialization.converters

import java.io.{ByteArrayOutputStream, File}
import java.nio.file.{Files, StandardOpenOption}
import java.util.zip.GZIPOutputStream

import com.komanov.serialization.converters.IoUtils._


object ReportGenerator extends App {

  val flush = true

  val dir = new File(System.getProperty("user.home"), "123")
  require(!flush || dir.exists() || dir.mkdirs())

  val (raws, gzips) = (Seq.newBuilder[(String, Seq[Int])], Seq.newBuilder[(String, Seq[Int])])

  for ((converterName, converter) <- Converters.all if converter ne ScalaPbConverter if converter ne ScroogeConverter) {
    val results = Seq.newBuilder[(Int, Int)]
    for ((name, site) <- TestData.sites) {
      val bytes = converter.toByteArray(site)
      val gzipLen = getGzipByteLength(bytes)

      results += bytes.length -> gzipLen

      if (flush) {
        val normalizedConverterName = converterName.toLowerCase().replace(" ", "-")
        Files.write(dir.toPath.resolve(s"site_${name}_$normalizedConverterName.bin"), bytes, StandardOpenOption.CREATE)
      }
    }

    raws += converterName -> results.result().map(_._1)
    gzips += converterName -> results.result().map(_._2)
  }

  println("Data Sizes (raw)")
  printHeaders
  printSizes(raws.result())

  println("Data Sizes (gzip)")
  printHeaders
  printSizes(gzips.result())

  private def printHeaders: Any = {
    println("Converter," + TestData.sites.map(_._1).mkString(","))
  }

  private def printSizes(all: Seq[(String, Seq[Int])]): Unit = {
    for ((name, list) <- all) {
      println(name + "," + list.mkString(","))
    }
  }

  private def getGzipByteLength(bytes: Array[Byte]): Int = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new GZIPOutputStream(baos)) { os =>
        os.write(bytes)
      }
      baos.toByteArray.length
    }
  }

} 
Example 171
Source File: SerializationTest.scala    From scala-serialization   with MIT License 5 votes vote down vote up
package com.komanov.serialization.converters

import java.io.ByteArrayOutputStream

import com.komanov.serialization.domain.SiteEventData
import org.apache.commons.io.HexDump
import org.specs2.mutable.SpecificationWithJUnit
import org.specs2.specification.Scope
import org.specs2.specification.core.Fragments

class SerializationTest extends SpecificationWithJUnit {

  sequential

  doTest("JSON", JsonConverter)
  doTest("ScalaPB", ScalaPbConverter)
  doTest("Java Protobuf", JavaPbConverter)
  doTest("Java Thrift", JavaThriftConverter)
  doTest("Scrooge", ScroogeConverter)
  doTest("Serializable", JavaSerializationConverter)
  doTest("Pickling", PicklingConverter)
  doTest("BooPickle", BoopickleConverter)
  doTest("Chill", ChillConverter)

  "ScalaPB and Java Protobuf" should {
    Fragments.foreach(TestData.sites) { case (name, site) =>
      s"be interoperable for site of $name" in new ctx {
        val javaMessage = JavaPbConverter.toByteArray(site)
        val scalaMessage = ScalaPbConverter.toByteArray(site)
        toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
      }
    }

    Fragments.foreach(TestData.events) { case (name, events) =>
      s"be interoperable events of $name" in new ctx {
        for (SiteEventData(_, event, _) <- events) {
          val javaMessage = JavaPbConverter.toByteArray(event)
          val scalaMessage = ScalaPbConverter.toByteArray(event)
          toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
        }
      }
    }
  }

  "Scrooge and Java Thrift" should {
    Fragments.foreach(TestData.sites) { case (name, site) =>
      s"be interoperable for site of $name" in new ctx {
        val javaMessage = JavaThriftConverter.toByteArray(site)
        val scalaMessage = ScroogeConverter.toByteArray(site)
        toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
      }
    }

    Fragments.foreach(TestData.events) { case (name, events) =>
      s"be interoperable events of $name" in new ctx {
        for (SiteEventData(_, event, _) <- events) {
          val javaMessage = JavaThriftConverter.toByteArray(event)
          val scalaMessage = ScroogeConverter.toByteArray(event)
          toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
        }
      }
    }
  }

  class ctx extends Scope

  def toHexDump(arr: Array[Byte]): String = {
    if (arr.isEmpty) {
      ""
    } else {
      val baos = new ByteArrayOutputStream
      HexDump.dump(arr, 0, baos, 0)
      new String(baos.toByteArray)
    }
  }

  def doTest(converterName: String, converter: MyConverter) = {
    converterName should {
      Fragments.foreach(TestData.sites) { case (name, site) =>
        s"serialize-parse site of $name" in new ctx {
          val bytes = converter.toByteArray(site)
          val parsed = converter.fromByteArray(bytes)
          parsed must be_===(site)
        }
      }

      Fragments.foreach(TestData.events) { case (name, events) =>
        s"serialize-parse site events of $name" in new ctx {
          for (SiteEventData(_, event, _) <- events) {
            val bytes = converter.toByteArray(event)
            val parsed = converter.siteEventFromByteArray(event.getClass, bytes)
            parsed must be_===(event)
          }
        }
      }
    }
  }

} 
Example 172
Source File: avroMarshallers.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka.avro

import java.io.ByteArrayOutputStream

import kafka.utils.VerifiableProperties
import org.apache.avro.Schema
import org.apache.avro.io._
import org.apache.avro.specific.{
  SpecificDatumReader,
  SpecificDatumWriter,
  SpecificRecord
}
import org.apache.kafka.common.serialization.{Deserializer, Serializer}

class KafkaAvroDeserializer[T <: SpecificRecord](schema: Schema)
    extends Deserializer[T]
    with NoOpConfiguration
    with NoOpClose {

  private val reader = new SpecificDatumReader[T](schema)

  override def deserialize(topic: String, data: Array[Byte]): T = {
    val decoder = DecoderFactory.get().binaryDecoder(data, null)
    reader.read(null.asInstanceOf[T], decoder)
  }
}

class KafkaAvroSerializer[T <: SpecificRecord]()
    extends Serializer[T]
    with NoOpConfiguration
    with NoOpClose {

  private def toBytes(nullableData: T): Array[Byte] =
    Option(nullableData).fold[Array[Byte]](null) { data =>
      val writer: DatumWriter[T] = new SpecificDatumWriter[T](data.getSchema)
      val out = new ByteArrayOutputStream()
      val encoder = EncoderFactory.get.binaryEncoder(out, null)

      writer.write(data, encoder)
      encoder.flush()
      out.close()

      out.toByteArray
    }

  override def serialize(topic: String, data: T): Array[Byte] =
    toBytes(data)
}

sealed trait NoOpConfiguration {
  def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()
}

sealed trait NoOpClose {
  def close(): Unit = ()
} 
Example 173
Source File: StreamingSpec.scala    From seals   with Apache License 2.0 5 votes vote down vote up
package com.example.streaming

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import shapeless.record._

import cats.effect.IO

import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpec

import fs2.Stream

import scodec.Codec
import scodec.bits.BitVector
import scodec.stream.CodecError

import dev.tauri.seals._
import dev.tauri.seals.scodec.Codecs._
import dev.tauri.seals.scodec.StreamCodecs._

class StreamingSpec extends AnyFlatSpec with Matchers {

  import Main.{ Animal, Elephant, Quokka, Quagga, Grey }

  val animals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f),
    Quokka("Nellie"),
    Quagga("Ford", speed = 120.0)
  )

  val transformedAnimals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f + 17.0f),
    Quokka("Nellie", Grey)
  )

  val animalStream = Stream.emits[IO, Animal](animals)

  val encoder = streamEncoderFromReified[Animal]
  val decoder = streamDecoderFromReified[Animal]

  "Encoding/decoding" should "work correctly" in {
    val tsk: IO[Unit] = for {
      bv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (animals)
    }
    tsk.unsafeRunSync()
  }

  it should "fail with incompatible models" in {
    val mod = Reified[Record.`'Elephant -> Elephant, 'Quokka -> Quokka`.T].model
    val bv: BitVector = Codec[Model].encode(mod).getOrElse(fail)
    val tsk: IO[Unit] = for {
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (Vector.empty)
    }

    val ex = intercept[CodecError] {
      tsk.unsafeRunSync()
    }
    ex.err.message should include ("incompatible models")
  }

  "Transformation" should "work correctly" in {
    val tsk: IO[Unit] = for {
      ibv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      is = new ByteArrayInputStream(ibv.toByteArray)
      os = new ByteArrayOutputStream
      _ <- Main.transform(is, os)(Main.transformer)
      obv = BitVector(os.toByteArray())
      transformed <- decoder.decode[IO](Stream(obv)).compile.fold(Vector.empty[Animal])(_ :+ _)
    } yield {
      transformed should === (transformedAnimals)
    }
    tsk.unsafeRunSync()
  }
} 
Example 174
Source File: StreamHandlerTest.scala    From scala-js-java-logging   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package org.scalajs.testsuite.javalib.util.logging

import java.io.ByteArrayOutputStream
import java.util.logging._

import org.junit.Test
import org.junit.Assert._

class StreamHandlerTest {
  object TestFormatter extends SimpleFormatter {
    override def getHead(h: Handler): String = "header"

    override def getTail(h: Handler): String = "footer"
  }

  @Test def test_logging():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
    assertTrue(o.toString.contains("message"))
  }

  @Test def test_default_level():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    // Defaults to level INFO
    sh.publish(new LogRecord(Level.FINER, "message"))
    sh.flush()
    assertFalse(o.toString.contains("message"))
  }

  @Test def test_default_config():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    assertNull(sh.getEncoding)
    assertNull(sh.getFilter)
    assertNotNull(sh.getFormatter)
    assertNotNull(sh.getErrorManager)
  }

  @Test def test_default_constructor_config():Unit = {
    val sh = new StreamHandler()
    assertNull(sh.getEncoding)
    assertNull(sh.getFilter)
    assertNotNull(sh.getFormatter)
    assertNotNull(sh.getErrorManager)
  }

  @Test def test_no_logging_for_level():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    sh.setLevel(Level.WARNING)
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
    // No output under the given level
    assertTrue(o.toString.isEmpty)
  }

  @Test def test_no_errors_if_no_stream():Unit = {
    val sh = new StreamHandler()
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
  }

  @Test def test_print_head():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, TestFormatter)
    assertTrue(o.toString.isEmpty)
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
    assertTrue(o.toString.contains("header"))
    assertTrue(!o.toString.contains("footer"))
  }

  @Test def test_print_tail():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, TestFormatter)
    assertTrue(o.toString.isEmpty)
    sh.close()
    assertTrue(o.toString.contains("header"))
    assertTrue(o.toString.contains("footer"))
  }
} 
Example 175
Source File: CaptureOutputStream.scala    From spatial   with MIT License 5 votes vote down vote up
package utils.io

import java.io.{ByteArrayOutputStream, OutputStream, PrintStream}

class CaptureOutputStream extends OutputStream {
  val data = new ByteArrayOutputStream()

  override def write(b: Int): Unit = data.write(b)
  override def write(b: Array[Byte]): Unit = data.write(b)
  override def write(b: Array[Byte], off: Int, len: Int): Unit = data.write(b,off,len)

  def dump: String = new java.lang.String(data.toByteArray, java.nio.charset.StandardCharsets.UTF_8)
}

class CaptureStream(__out: CaptureOutputStream, paired: PrintStream) extends PrintStream(__out) {
  def this(paired: PrintStream) = this(new CaptureOutputStream(), paired)
  def dump: String = __out.dump
  //TODO[5]: For some reason this duplicates the printing
  //override def print(s: String): Unit = { paired.print(s); super.print(s) }
  //override def println(s: String): Unit = { paired.println(s); super.println(s) }
} 
Example 176
Source File: ShowSchemaMainTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.cli

import java.io.{ByteArrayOutputStream, PrintStream}

import org.scalatest.{Matchers, WordSpec}

class ShowSchemaMainTest extends WordSpec with Matchers {

  "SchemaMain" should {
    "display schema for specified avro source" in {
      val baos = new ByteArrayOutputStream
      val out = new PrintStream(baos)
      ShowSchemaMain(Seq("--source", "avro:" + getClass.getResource("/test.avro").getFile), out)
      new String(baos.toByteArray).trim shouldBe """{"type":"record","name":"row","namespace":"namespace","fields":[{"name":"name","type":"string"},{"name":"job","type":"string"},{"name":"location","type":"string"}]}"""
    }
  }
} 
Example 177
Source File: ArrowConverters.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.arrow

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels

import org.apache.arrow.memory.BufferAllocator
import org.apache.arrow.vector._
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
import org.apache.spark.TaskContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types._
import com.twosigma.flint.util.Utils
import org.apache.arrow.vector.ipc.{ ArrowFileReader, ArrowFileWriter }
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch

trait ClosableIterator[T] extends Iterator[T] with AutoCloseable

class ConcatClosableIterator[T](iters: Iterator[ClosableIterator[T]])
  extends ClosableIterator[T] {
  var curIter: ClosableIterator[T] = _

  private def advance(): Unit = {
    require(curIter == null || !curIter.hasNext, "Should not advance if curIter is not empty")
    require(iters.hasNext, "Should not advance if iters doesn't have next")
    closeCurrent()
    curIter = iters.next()
  }

  private def closeCurrent(): Unit = if (curIter != null) curIter.close()

  override def close(): Unit = closeCurrent()

  override def hasNext: Boolean = {
    if (curIter == null || !curIter.hasNext) {
      if (iters.hasNext) {
        advance()
        hasNext
      } else {
        false
      }
    } else {
      true
    }
  }

  override def next(): T = curIter.next()
}


  def byteArrayToBatch(
    batchBytes: Array[Byte],
    allocator: BufferAllocator
  ): ArrowRecordBatch = {
    val in = new ByteArrayReadableSeekableByteChannel(batchBytes)
    val reader = new ArrowFileReader(in, allocator)

    // Read a batch from a byte stream, ensure the reader is closed
    Utils.tryWithSafeFinally {
      val root = reader.getVectorSchemaRoot
      // throws IOException
      val unloader = new VectorUnloader(root)
      reader.loadNextBatch() // throws IOException
      unloader.getRecordBatch
    } {
      reader.close()
    }
  }
} 
Example 178
Source File: ArrowSummarizer.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.rdd.function.summarize.summarizer

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels
import java.util

import com.twosigma.flint.arrow.{ ArrowFieldWriter, ArrowPayload, ArrowUtils, ArrowWriter }
import org.apache.arrow.memory.{ BufferAllocator, RootAllocator }
import org.apache.arrow.vector.VectorSchemaRoot
import org.apache.arrow.vector.ipc.ArrowFileWriter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.GenericArrayData
import org.apache.spark.sql.types.StructType

import scala.collection.JavaConverters._


case class ArrowSummarizer(inputSchema: StructType, outputSchema: StructType, includeBaseRows: Boolean)
  extends Summarizer[InternalRow, ArrowSummarizerState, ArrowSummarizerResult] {
  private[this] val size = outputSchema.size
  require(size > 0, "Cannot create summarizer with no input columns")

  // This function will allocate memory from the BufferAllocator to initialize arrow vectors.
  override def zero(): ArrowSummarizerState = {
    new ArrowSummarizerState(false, null, null, null, null)
  }

  private def init(u: ArrowSummarizerState): Unit = {
    if (!u.initialized) {
      val arrowSchema = ArrowUtils.toArrowSchema(outputSchema)
      val allocator = new RootAllocator(Int.MaxValue)
      val root = VectorSchemaRoot.create(arrowSchema, allocator)
      val arrowWriter = ArrowWriter.create(inputSchema, outputSchema, root)

      u.initialized = true
      u.baseRows = new util.ArrayList[InternalRow]()
      u.allocator = allocator
      u.root = root
      u.arrowWriter = arrowWriter
    }
  }

  override def add(u: ArrowSummarizerState, row: InternalRow): ArrowSummarizerState = {
    if (!u.initialized) {
      init(u)
    }

    if (includeBaseRows) {
      u.baseRows.add(row)
    }
    u.arrowWriter.write(row)
    u
  }

  override def merge(
    u1: ArrowSummarizerState,
    u2: ArrowSummarizerState
  ): ArrowSummarizerState = throw new UnsupportedOperationException()

  // This can only be called once
  override def render(u: ArrowSummarizerState): ArrowSummarizerResult = {
    if (u.initialized) {
      val out = new ByteArrayOutputStream()
      val writer = new ArrowFileWriter(u.root, null, Channels.newChannel(out))

      u.arrowWriter.finish()
      writer.writeBatch()

      writer.close()
      u.root.close()
      u.allocator.close()

      val rows = u.baseRows.toArray.asInstanceOf[Array[Any]]
      ArrowSummarizerResult(rows, out.toByteArray)
    } else {
      ArrowSummarizerResult(Array.empty, Array.empty)
    }
  }

  override def close(u: ArrowSummarizerState): Unit = {
    if (u.initialized) {
      u.arrowWriter.reset()
      u.root.close()
      u.allocator.close()
    }
  }
} 
Example 179
Source File: ConcatArrowAndExplodeSpec.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.timeseries

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels
import java.util.concurrent.TimeUnit

import com.twosigma.flint.arrow.ArrowUtils
import org.apache.arrow.memory.RootAllocator
import org.apache.arrow.vector.ipc.ArrowFileWriter
import org.apache.arrow.vector.{ BigIntVector, Float8Vector, VectorSchemaRoot }
import org.apache.spark.sql.functions.{ array, col, lit, struct }
import org.apache.spark.sql.types._

class ConcatArrowAndExplodeSpec extends TimeSeriesSuite {

  "ConcatArrowAndExplode" should "work" in {

    val batchSize = 10

    var df = spark.range(1000, 2000, 1000).toDF("time")
    val columns = (0 until batchSize).map(v => struct((df("time") + v).as("time"), lit(v.toDouble).as("v")))
    df = df.withColumn("base_rows", array(columns: _*))

    val allocator = new RootAllocator(Long.MaxValue)

    val schema1 = StructType(Seq(StructField("v1", DoubleType)))
    val root1 = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(schema1), allocator)
    val vector1 = root1.getVector("v1").asInstanceOf[Float8Vector]
    vector1.allocateNew()

    for (i <- 0 until batchSize) {
      vector1.set(i, i + 10.0)
    }
    vector1.setValueCount(batchSize)
    val out1 = new ByteArrayOutputStream()
    val arrowWriter1 = new ArrowFileWriter(root1, null, Channels.newChannel(out1))
    arrowWriter1.writeBatch()
    arrowWriter1.close()
    root1.close()
    df = df.withColumn("f1_schema", struct(lit(0.0).as("v1")))
    df = df.withColumn("f1_data", lit(out1.toByteArray))

    val schema2 = StructType(Seq(StructField("v2", DoubleType), StructField("v3", LongType)))
    val root2 = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(schema2), allocator)
    val vector2 = root2.getVector("v2").asInstanceOf[Float8Vector]
    val vector3 = root2.getVector("v3").asInstanceOf[BigIntVector]
    vector2.allocateNew()
    vector3.allocateNew()

    for (i <- 0 until batchSize) {
      vector2.set(i, i + 20.0)
    }
    vector2.setValueCount(batchSize)

    for (i <- 0 until batchSize) {
      vector3.set(i, i + 30L)
    }
    vector3.setValueCount(batchSize)
    val out2 = new ByteArrayOutputStream()
    val arrowWriter2 = new ArrowFileWriter(root2, null, Channels.newChannel(out2))
    arrowWriter2.writeBatch()
    arrowWriter2.close()
    root2.close()
    df = df.withColumn("f2_schema", struct(lit(0.0).as("v2"), lit(0L).as("v3")))
    df = df.withColumn("f2_data", lit(out2.toByteArray))

    var tsrdd = TimeSeriesRDD.fromDF(df)(isSorted = false, timeUnit = TimeUnit.NANOSECONDS)
    tsrdd = tsrdd.concatArrowAndExplode("base_rows", Seq("f1_schema", "f2_schema"), Seq("f1_data", "f2_data"))
    tsrdd.toDF.show()

    var expected = spark.range(1000, 1000 + batchSize).toDF("time")
    expected = expected.withColumn("v", col("time") - 1000.0)
    expected = expected.withColumn("v1", col("time") - 1000 + 10.0)
    expected = expected.withColumn("v2", col("time") - 1000 + 20.0)
    expected = expected.withColumn("v3", col("time") - 1000 + 30)

    val expectedTsrdd = TimeSeriesRDD.fromDF(expected)(isSorted = false, timeUnit = TimeUnit.NANOSECONDS)
    assertEquals(tsrdd, expectedTsrdd)
  }

} 
Example 180
Source File: get_features_from_peinfo.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import Array.concat
import org.apache.spark.sql.types._
import org.apache.spark.ml.linalg.SQLDataTypes.VectorType 
import org.apache.spark.ml.linalg._
import org.apache.spark.sql.Row
import org.apache.spark.ml.feature.MinMaxScaler
import org.apache.spark.ml.linalg.DenseVector
import PreProcessingConfig._

case class peinfo_results_by_service_name_class(service_name: String, sha256: String)
case class peinfo_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class peinfo_join_results_class(sha256: String, service_name: String, results: String)
case class peinfo_int_final_array_rdd_class(sha256: String, array_results: Array[Double])
case class peinfo_binaray_final_array_rdd_class(sha256:String, array_results :Array[Double])
case class peinfo_final_array_rdd_class(sha256:String, array_results: Array[Double])

def unzip(x: Array[Byte]) : String = {      
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def findAllIntinpeinfo( peinfo_json_results : JsLookupResult, time: Double): Array[Double]= {
    val entropy = peinfo_json_results \\ "entropy" ; val virt_address = peinfo_json_results \\ "virt_address"; val virt_size = peinfo_json_results \\ "virt_size"; val size = peinfo_json_results \\ "size";
    var i= 0; var List  = Array.iterate(0.0,17)(a=>a*0)
    for (k <- ( peinfo_json_results \\ "section_name")){
        k.as[String] match {
            case ".text\u0000\u0000\u0000" => { List(0)=entropy(i).as[Double]; List(1)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(2)=virt_size(i).as[Double]; List(3)=size(i).as[Double] }
            case ".data\u0000\u0000\u0000" => { List(4)=entropy(i).as[Double]; List(5)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(6)=virt_size(i).as[Double]; List(7)=size(i).as[Double] }
            case ".rsrc\u0000\u0000\u0000" => { List(8)=entropy(i).as[Double]; List(9)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(10)=virt_size(i).as[Double]; List(11)=size(i).as[Double] }
            case ".rdata\u0000\u0000" => { List(12)=entropy(i).as[Double]; List(13)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(14)=virt_size(i).as[Double]; List(15)=size(i).as[Double] }
            case other => {}
        }
        i = i + 1
    }
    List(16)= time
    return List.toArray
}

val peinfo_results_by_service_name_meta = sc.cassandraTable[peinfo_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","peinfo")
val peinfo_results_by_service_name_rdd = peinfo_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val peinfo_results_by_sha256_meta = sc.cassandraTable[peinfo_results_by_sha256_class](keyspace,sha256_table)
val peinfo_results_by_sha256_rdd = peinfo_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val peinfo_join_results = peinfo_results_by_service_name_rdd.join(peinfo_results_by_sha256_rdd).map(x=> (new peinfo_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()

val peinfo_int_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "pe_sections"),{if ((Json.parse(x.results) \ "timestamp").isInstanceOf[JsUndefined]) 0.0 else (Json.parse(x.results) \ "timestamp" \\ "timestamp")(0).as[Double]})).filter(x=> !x._2.isInstanceOf[JsUndefined]).map(x=>new  peinfo_int_final_array_rdd_class(x._1,findAllIntinpeinfo(x._2,x._3)))

val peinfo_dllfunction_list= peinfo_join_results.map(x=>Json.parse(x.results) \ "imports").filter(x=> !x.isInstanceOf[JsUndefined]).flatMap(x=>x.as[List[Map[String, String]]].map(x=>(x("dll")+"."+x("function")))).toDF("func_name").groupBy("func_name").count.sort(desc("count")).filter("count > 10000").rdd.map(r => r.getString(0)).collect().toList
implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllBininpeinfo_dllfunction(peinfo_dllfunction : Seq[String]) : Array[Double] ={
    val forlist = for (family <- peinfo_dllfunction_list) yield {
        (peinfo_dllfunction.contains(family):Int).toDouble
    }
    return (forlist).toArray
}
val List502 = Array.iterate(0.0,502)(a=>0.0)
val peinfo_binaray_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "imports"))).map(x=>new  peinfo_binaray_final_array_rdd_class(x._1,{if (x._2.isInstanceOf[JsUndefined]) List502 else findAllBininpeinfo_dllfunction(x._2.as[Seq[Map[String, String]]].map(x=>(x("dll")+"."+x("function"))))}))

val peinfo_int_final_array_rdd_before_join = peinfo_int_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_binaray_final_array_rdd_before_join = peinfo_binaray_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_array_rdd_by_join = peinfo_int_final_array_rdd_before_join.join(peinfo_binaray_final_array_rdd_before_join).map(x=> (x._1,concat(x._2._1,x._2._2)))
val peinfo_final_array_rdd = peinfo_array_rdd_by_join.map(x=>new peinfo_final_array_rdd_class(x._1,x._2))

val peinfo_schema = new StructType().add("sha256", StringType).add("peinfo",VectorType)
val peinfo_vector_rdd = peinfo_final_array_rdd.map(x=>(x.sha256,Vectors.dense(x.array_results)))
val peinfo_vector_rowrdd = peinfo_vector_rdd.map(p => Row(p._1,p._2))
val peinfo_vector_dataframe = spark.createDataFrame(peinfo_vector_rowrdd, peinfo_schema)
val peinfo_scaler = new MinMaxScaler()
  .setInputCol("peinfo")
  .setOutputCol("scaled_peinfo")
val peinfo_scalerModel = peinfo_scaler.fit(peinfo_vector_dataframe)
val peinfo_scaledData_df = peinfo_scalerModel.transform(peinfo_vector_dataframe)
val peinfo_scaledData_rdd = peinfo_scaledData_df.select("sha256","scaled_peinfo").rdd.map(row=>(row.getAs[String]("sha256"),row.getAs[DenseVector]("scaled_peinfo"))).map(x=>new peinfo_final_array_rdd_class(x._1,x._2.toArray))
peinfo_scaledData_rdd.toDF().write.format("parquet").save(peinfo_final_array_file) 
Example 181
Source File: get_features_from_objdump.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class objdump_results_by_service_name_class(service_name: String, sha256: String)
case class objdump_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class objdump_join_results_class(sha256: String, service_name: String, results: String)
case class objdump_binaray_final_array_rdd_class(sha256: String, array_results: Array[Double])
 
val objdump_main_list = sc.textFile(objdump_x86Opcodes_file).collect.toList
def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def combineAllObjdumpInOne( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] ={
    if (malwarelist(0).toString() == "null") return List("null")
    var begin = malwarelist(0).as[List[String]]
    for (i <- 1 to (malwarelist.size-1)){
        if (malwarelist(i).toString() == "null") begin = begin
        else begin = begin ::: malwarelist(i).as[List[String]]
    }
    return  begin
}
def convertToList( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] = {
    if (malwarelist(0).toString() == "null") return List("null")
    else {
        return malwarelist(0).as[List[String]]
    } 
    
}
def findAllBininobjdump_main_list(malware :List[String]) : Array[Double] ={
    if (malware == List("null")) return (List.fill(10000)(0.0)).toArray
    else {
        val forlist = for ( one  <- malware ) yield {
            objdump_main_list.indexOf(one) + 1.0
        }
        if (forlist.size < 10000){
            return  (List.concat(forlist,List.fill(10000-forlist.size)(0.0))).toArray
        }
        else return forlist.toArray
    }
}

val objdump_results_by_service_name_meta = sc.cassandraTable[objdump_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","objdump")
val objdump_results_by_service_name_rdd = objdump_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val objdump_results_by_sha256_meta = sc.cassandraTable[objdump_results_by_sha256_class](keyspace,sha256_table)
val objdump_results_by_sha256_rdd = objdump_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val objdump_join_results = objdump_results_by_service_name_rdd.join(objdump_results_by_sha256_rdd).map(x=> (new objdump_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct()
val objdump_binaray_final_array_rdd = objdump_join_results.map(x=>(x.sha256,(Json.parse(x.results) \\ "opcodes"))).filter(x=> (x._2.size > 0)).map(x=>(x._1,if ( x._2.size == 1 ) convertToList(x._2) else combineAllObjdumpInOne(x._2))).map(x=>(x._1,findAllBininobjdump_main_list(x._2)))
objdump_binaray_final_array_rdd.toDF().write.format("parquet").save(objdump_binaray_final_array_file) 
Example 182
Source File: get_VT_signatures.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
import com.datastax.spark.connector._
import play.api.libs.json.Json
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class VT_results_by_service_name_class(service_name: String, sha256: String)
case class VT_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte] )
case class VT_join_results_class(sha256: String, service_name: String, results: String)
case class VT_sample_signatures_initial_seq_rdd_class(sha256: String, seq_results: Seq[String])
case class VT_sample_signatures_final_array_rdd_class(sha256:String, array_results:Array[Double])

def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def deleteNumberInSampleSignatures(x: String): Boolean = {
    val regex = "[0-9]".r
    return regex.findFirstIn(x).isEmpty
}

val VT_results_by_service_name_meta = sc.cassandraTable[VT_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","virustotal")
val VT_results_by_service_name_rdd = VT_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val VT_results_by_sha256_meta = sc.cassandraTable[VT_results_by_sha256_class](keyspace,sha256_table)
val VT_results_by_sha256_rdd = VT_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val VT_join_results = VT_results_by_service_name_rdd.join(VT_results_by_sha256_rdd).map(x => (new VT_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()
val sample_signatures_rdd = VT_join_results.flatMap(x=>Json.parse(x.results) \ "scans" \\ "result").map(x=>Json.stringify(x)).filter( x=> !(x == "null"))
val sample_signatures_split_rdd = sample_signatures_rdd.flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())
val signatures_prefix_rdd = sc.textFile(VT_signatures_prefix_suffix_file).map(x=>x.toLowerCase())
val family_signatures_subtract_rdd = sample_signatures_split_rdd.subtract(signatures_prefix_rdd)
val family_signatures_sorted_rdd = sc.parallelize(family_signatures_subtract_rdd.countByValue().toSeq).filter(x=>(x._2>50)).sortBy(x=>x._2,false)
val family_signatures_list = family_signatures_sorted_rdd.keys.collect().toList
val VT_sample_signatures_rdd = VT_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "scans" \\ "result").map(_.toString).filter( s => !(s== "null")).flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())))
val  VT_sample_signatures_initial_seq_rdd = VT_sample_signatures_rdd.map(x=>new VT_sample_signatures_initial_seq_rdd_class(x._1, x._2))

implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllInFamilySignatures(sample_signatures_seq : Seq[String]) : Array[Double] ={
    val forlist = for (family <- family_signatures_list) yield {
        (sample_signatures_seq.contains(family):Int).toDouble
    }
    return forlist.toArray
}

val VT_sample_signatures_final_array_rdd = VT_sample_signatures_initial_seq_rdd.map(x=>new VT_sample_signatures_final_array_rdd_class(x.sha256,findAllInFamilySignatures(x.seq_results)))
VT_sample_signatures_final_array_rdd.toDF().write.format("parquet").save(VT_sample_signatures_final_array_file) 
Example 183
Source File: HelperMethods.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
package com.holmesprocessing.analytics.relationship.knowledgeBase

import play.api.libs.json.Json
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream


  def score(ruleset_1: String, ruleset_2:String) : Double = {

    val split_1 = ruleset_1.split(",").toSeq
    val split_2 = ruleset_2.split(",").toSeq
    if (split_1.length > 0 && split_2.length > 0) {
      return split_1.intersect(split_2).length.toDouble/split_1.union(split_2).distinct.length.toDouble
    } else {
      return 0
    }
  }

} 
Example 184
Source File: KryoInitSpec.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.core.akka

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.romix.scala.serialization.kryo.{
  EnumerationSerializer,
  ScalaImmutableAbstractMapSerializer,
  ScalaMutableMapSerializer
}
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpecLike

class KryoInitSpec extends Matchers with AnyFlatSpecLike {

  "The custom KryoInit" should "register serializers" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    kryo.getDefaultSerializer(classOf[scala.Enumeration#Value]) shouldBe an[
      EnumerationSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.mutable.HashMap[_, _]]) shouldBe a[
      ScalaMutableMapSerializer
    ]
  }

  it should "serialize immutable maps" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    val map1 = Map(
      "Rome" -> "Italy",
      "London" -> "England",
      "Paris" -> "France",
      "New York" -> "USA",
      "Tokyo" -> "Japan",
      "Peking" -> "China",
      "Brussels" -> "Belgium"
    )
    val map2 = map1 + ("Moscow" -> "Russia")
    val map3 = map2 + ("Berlin" -> "Germany")
    val map4 = map3 + ("Germany" -> "Berlin", "Russia" -> "Moscow")
    roundTrip(map1, kryo)
    roundTrip(map2, kryo)
    roundTrip(map3, kryo)
    roundTrip(map4, kryo)
  }

  def roundTrip[T](obj: T, kryo: Kryo): T = {
    val outStream = new ByteArrayOutputStream()
    val output = new Output(outStream, 4096)
    kryo.writeClassAndObject(output, obj)
    output.flush()

    val input = new Input(new ByteArrayInputStream(outStream.toByteArray), 4096)
    val obj1 = kryo.readClassAndObject(input)

    assert(obj == obj1)

    obj1.asInstanceOf[T]
  }

} 
Example 185
Source File: BytecodeUtils.scala    From graphx-algorithm   with GNU General Public License v2.0 5 votes vote down vote up
package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.spark.util.Utils

import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._



  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM4) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM4) {
          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
} 
Example 186
Source File: RulesTxtDeploymentServiceSpec.scala    From smui   with Apache License 2.0 5 votes vote down vote up
package models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase {

  private lazy val service = injector.instanceOf[RulesTxtDeploymentService]
  private var inputIds: Seq[SearchInputId] = Seq.empty

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    createTestCores()
    inputIds = createTestRule()
  }

  private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith =>
                           |	SYNONYM: mercury
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |mercury =>
                           |	SYNONYM: aerosmith
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |shipping =>
                           |	DECORATE: REDIRECT http://xyz.com/shipping
                           |	@{
                           |	  "_log" : "${ruleIds.last}"
                           |	}@""".stripMargin

  "RulesTxtDeploymentService" should "generate rules files with correct file names" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    rulesTxt.solrIndexId shouldBe core1Id
    rulesTxt.decompoundRules shouldBe empty
    rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds)

    rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp"
    rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt"
  }

  it should "validate the rules files correctly" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty

    val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file"))
    service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction")
  }

  it should "provide a zip file with all rules files" in {
    val out = new ByteArrayOutputStream()
    service.writeAllRulesTxtFilesAsZipFileToStream(out)

    val bytes = out.toByteArray
    val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes))
    val firstEntry = zipStream.getNextEntry
    firstEntry.getName shouldBe "rules_core1.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds)
    val secondEntry = zipStream.getNextEntry
    secondEntry.getName shouldBe "rules_core2.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe ""
  }

} 
Example 187
Source File: AggregatorTest.scala    From noether   with Apache License 2.0 5 votes vote down vote up
package com.spotify.noether

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.twitter.algebird.Aggregator
import org.scalatest._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

trait AggregatorTest extends AnyFlatSpec with Matchers {
  def run[A, B, C](aggregator: Aggregator[A, B, C])(as: Seq[A]): C = {
    val bs = as.map(aggregator.prepare _ compose ensureSerializable)
    val b = ensureSerializable(aggregator.reduce(bs))
    ensureSerializable(aggregator.present(b))
  }

  private def serializeToByteArray(value: Any): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  private def ensureSerializable[T](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
} 
Example 188
Source File: TestingTypedCount.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
} 
Example 189
Source File: SapSQLContextSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.io.{ByteArrayOutputStream, ObjectOutputStream}

import org.apache.spark.sql.parser.SapParserException
import org.apache.spark.util.DummyRelationUtils._
import org.mockito.Mockito
import org.scalatest.FunSuite

class SapSQLContextSuite extends FunSuite with GlobalSapSQLContext {

  test("SQL contexts do not support hive functions") {
    val rdd = sc.parallelize(Seq(Row("1"), Row("2")))
    sqlc.createDataFrame(rdd, 'a.string, needsConversion = false)
      .registerTempTable("foo")

    intercept[AnalysisException] {
      sqlc.sql("SELECT int(a) FROM foo")
    }
  }

  test ("Check Spark Version"){
     val sap_sqlc = sqlContext.asInstanceOf[CommonSapSQLContext]
     // current spark runtime version shall be supported
     sap_sqlc.checkSparkVersion(List(org.apache.spark.SPARK_VERSION))

     // runtime exception for an unsupported version
     intercept[RuntimeException]{
      sap_sqlc.checkSparkVersion(List("some.unsupported.version"))
     }
  }

  test("Slightly different versions") {
    val sap_sqlc = sqlContext.asInstanceOf[CommonSapSQLContext]
    val spy_sap_sqlc = Mockito.spy(sap_sqlc)
    Mockito.when(spy_sap_sqlc.getCurrentSparkVersion())
      .thenReturn(org.apache.spark.SPARK_VERSION + "-CDH")

    // should not throw!
    spy_sap_sqlc.checkSparkVersion(spy_sap_sqlc.supportedVersions)

    Mockito.when(spy_sap_sqlc.getCurrentSparkVersion())
      .thenReturn("something- " + org.apache.spark.SPARK_VERSION)

    // should not throw!
    spy_sap_sqlc.checkSparkVersion(spy_sap_sqlc.supportedVersions)
  }

  test("Ensure SapSQLContext stays serializable"){
    // relevant for Bug 92818
    // Remember that all class references in SapSQLContext must be serializable!
    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
    oos.writeObject(sqlContext)
    oos.close()
  }

  test("Rand function") {
    sqlContext.sql(
      s"""
         |CREATE TABLE test (name varchar(20), age integer)
         |USING com.sap.spark.dstest
         |OPTIONS (
         |tableName "test"
         |)
       """.stripMargin)

    sqlContext.sql("SELECT * FROM test WHERE rand() < 0.1")
  }

  test("test version fields") {
    val sapSqlContext = sqlContext.asInstanceOf[CommonSapSQLContext]
    assert(sapSqlContext.EXTENSIONS_VERSION.isEmpty)
    assert(sapSqlContext.DATASOURCES_VERSION.isEmpty)
  }
} 
Example 190
Source File: SQLRunnerSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package com.sap.spark.cli

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}

import org.apache.spark.SparkContext
import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext}
import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers}



    // good call
    val goodOpts =
      SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv"))

    goodOpts.sqlFiles should be(List("a.sql", "b.sql"))
    goodOpts.output should be(Some("output.csv"))

    // bad call
    val badOpts = SQLRunner.parseOpts(List())

    badOpts.sqlFiles should be(List())
    badOpts.output should be(None)

    // ugly call
    val uglyOpts =
      SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql"))

    uglyOpts.sqlFiles should be(List("a.sql", "b.sql"))
    uglyOpts.output should be(Some("output.csv"))
  }

  def runSQLTest(input: String, expectedOutput: String): Unit = {
    val inputStream: InputStream = new ByteArrayInputStream(input.getBytes())
    val outputStream = new ByteArrayOutputStream()

    SQLRunner.sql(inputStream, outputStream)

    val output = outputStream.toString
    output should be(expectedOutput)
  }

  test("can run dummy query") {
    val input = "SELECT 1;"
    val output = "1\n"

    runSQLTest(input, output)
  }

  test("can run multiple dummy queries") {
    val input = """
        |SELECT 1;SELECT 2;
        |SELECT 3;
      """.stripMargin

    val output = "1\n2\n3\n"

    runSQLTest(input, output)
  }

  test("can run a basic example with tables") {
    val input = """
                  |SELECT * FROM DEMO_TABLE;
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }

  test("can run an example with comments") {
    val input = """
                  |SELECT * FROM DEMO_TABLE; -- this is the first query
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |-- now let's drop a table
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }
} 
Example 191
Source File: StyleChecker.scala    From sbt-coursera   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package ch.epfl.lamp

import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import Settings._

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec]() extends Output[T] {
    private val messageHelper = new MessageHelper(this.getClass().getClassLoader())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, clazz, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) println("")
      fileCount += 1
      println("  " + fileCount + ". " + level + pos(line, column) + ":")
      println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(line) => " at line " + line + (column match {
        case Some(column) => " character " + column
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String, courseId: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = Console.withOut(new PrintStream(output)) {
      new CustomTextOutput().output(messages)
    }

    val msg =
      output.toString +
        "Processed " + outputResult.files + " file(s)\n" +
        "Found " + outputResult.errors + " errors\n" +
        "Found " + outputResult.warnings + " warnings\n" +
        (if (outputResult.errors + outputResult.warnings > 0) "Consult the style guide at %s/wiki/ScalaStyleGuide".format(baseURL(courseId)) else "")

    (msg, score(outputResult))
  }
} 
Example 192
Source File: RegisterNodeSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.RegisterNode
import justin.db.consistenthashing.NodeId
import org.scalatest.{FlatSpec, Matchers}

class RegisterNodeSerializerTest extends FlatSpec with Matchers {

  behavior of "RegisterNode Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[RegisterNode], RegisterNodeSerializer)

    // object
    val serializedData = RegisterNode(NodeId(1))

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[RegisterNode])

    serializedData shouldBe deserializedData
  }
} 
Example 193
Source File: DataSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataSerializerTest extends FlatSpec with Matchers {

  behavior of "Data Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[justin.db.Data], DataSerializer)

    // object
    val vClock         = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val timestamp      = System.currentTimeMillis()
    val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[Data])

    serializedData shouldBe deserializedData
  }
} 
Example 194
Source File: StorageNodeWriteDataLocalSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeWriteDataLocal Serializer"

  it should "serialize/deserialize StorageNodeWriteDataLocal" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer)

    // object
    val data = Data(
      id        = UUID.randomUUID(),
      value     = "some value",
      vclock    = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))),
      timestamp = System.currentTimeMillis()
    )
    val serializedData = StorageNodeWriteDataLocal(data)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal])

    serializedData shouldBe deserializedData
  }
} 
Example 195
Source File: StorageNodeLocalReadSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.StorageNodeLocalRead
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeLocalReader Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer)

    // object
    val serializedData = StorageNodeLocalRead(UUID.randomUUID())

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead])

    serializedData shouldBe deserializedData
  }
} 
Example 196
Source File: RocksDBStorage.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData}
import org.rocksdb.{FlushOptions, Options, RocksDB}

import scala.concurrent.Future

// TODO:
// Current version store every single data under one file (totally doesn't care about data originality).
// Data should be eventually splitted by ring partitionId.
// This might be an issue during possible data movements between nodes.
final class RocksDBStorage(dir: File) extends PluggableStorageProtocol {
  import RocksDBStorage._

  {
    RocksDB.loadLibrary()
  }

  private[this] val kryo = new Kryo()

  private[this] val db: RocksDB = {
    val options: Options = new Options().setCreateIfMissing(true)
    RocksDB.open(options, dir.getPath)
  }

  override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = {
    val key: Array[Byte] = uuid2bytes(kryo, id)
    val dataBytes: Array[Byte] = db.get(key)

    val justinDataOpt = Option(dataBytes).map { dataBytes =>
      val input = new Input(new ByteArrayInputStream(dataBytes))
      JustinDataSerializer.read(kryo, input, classOf[JustinData])
    }

    Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None))
  }

  override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = {
    val key: Array[Byte] = uuid2bytes(kryo, data.id)
    val dataBytes: Array[Byte] = {
      val output = new Output(new ByteArrayOutputStream())
      JustinDataSerializer.write(kryo, output, data)
      output.getBuffer
    }

    db.put(key, dataBytes)
    db.flush(new FlushOptions().setWaitForFlush(true))

    Ack.future
  }
}

object RocksDBStorage {

  def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = {
    val output = new Output(new ByteArrayOutputStream(), 16)
    UUIDSerializer.write(kryo, output, id)
    output.getBuffer
  }

  object UUIDSerializer extends Serializer[UUID] {
    override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = {
      new UUID(input.readLong, input.readLong)
    }

    override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = {
      output.writeLong(uuid.getMostSignificantBits)
      output.writeLong(uuid.getLeastSignificantBits)
    }
  }

  object JustinDataSerializer extends Serializer[JustinData] {
    override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = {
      JustinData(
        id        = UUIDSerializer.read(kryo, input, classOf[UUID]),
        value     = input.readString(),
        vclock    = input.readString(),
        timestamp = input.readLong()
      )
    }

    override def write(kryo: Kryo, output: Output, data: JustinData): Unit = {
      UUIDSerializer.write(kryo, output, data.id)
      output.writeString(data.value)
      output.writeString(data.vclock)
      output.writeLong(data.timestamp)
    }
  }
} 
Example 197
Source File: JustinDataSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.storage.RocksDBStorage.JustinDataSerializer
import org.scalatest.{FlatSpec, Matchers}

class JustinDataSerializerTest extends FlatSpec with Matchers {

  behavior of "JustinDataSerializer"

  it should "serialize/deserialize JustinData with Kryo" in {
    val kryo = new Kryo()
    val data = JustinData(
      id        = UUID.randomUUID,
      value     = "to jest przykladowa wartość",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )

    // serialize
    val output = new Output(new ByteArrayOutputStream())
    JustinDataSerializer.write(kryo, output, data)
    val dataBytes = output.getBuffer

    // deserialize
    val input = new Input(new ByteArrayInputStream(dataBytes))
    JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data
  }
} 
Example 198
Source File: AvroRandomExtractor.scala    From streamliner-examples   with Apache License 2.0 5 votes vote down vote up
package com.memsql.spark.examples.avro

import com.memsql.spark.etl.api._
import com.memsql.spark.etl.utils.PhaseLogger
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.sql.{SQLContext, DataFrame, Row}
import org.apache.spark.sql.types._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericData
import org.apache.avro.io.{DatumWriter, EncoderFactory}
import org.apache.avro.specific.SpecificDatumWriter

import java.io.ByteArrayOutputStream

// Generates an RDD of byte arrays, where each is a serialized Avro record.
class AvroRandomExtractor extends Extractor {
  var count: Int = 1
  var generator: AvroRandomGenerator = null
  var writer: DatumWriter[GenericData.Record] = null
  var avroSchema: Schema = null
  
  def schema: StructType = StructType(StructField("bytes", BinaryType, false) :: Nil)

  val parser: Schema.Parser = new Schema.Parser()

  override def initialize(ssc: StreamingContext, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Unit = {
    val userConfig = config.asInstanceOf[UserExtractConfig]
    val avroSchemaJson = userConfig.getConfigJsValue("avroSchema") match {
      case Some(s) => s
      case None => throw new IllegalArgumentException("avroSchema must be set in the config")
    }
    count = userConfig.getConfigInt("count").getOrElse(1)
    avroSchema = parser.parse(avroSchemaJson.toString)

    writer = new SpecificDatumWriter(avroSchema)
    generator = new AvroRandomGenerator(avroSchema)
  }

  override def next(ssc: StreamingContext, time: Long, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Option[DataFrame] = {
    val rdd = sqlContext.sparkContext.parallelize((1 to count).map(_ => Row({
      val out = new ByteArrayOutputStream
      val encoder = EncoderFactory.get().binaryEncoder(out, null)
      val avroRecord: GenericData.Record = generator.next().asInstanceOf[GenericData.Record]

      writer.write(avroRecord, encoder)
      encoder.flush
      out.close
      out.toByteArray
    })))

    Some(sqlContext.createDataFrame(rdd, schema))
  }
} 
Example 199
Source File: ResponseHelper.scala    From ledger-manager-chrome   with MIT License 5 votes vote down vote up
package co.ledger.wallet.core.net



import java.io.{ByteArrayOutputStream, StringWriter}
import java.nio.charset.Charset

import org.json.{JSONArray, JSONObject}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.scalajs.js
import scala.util.{Failure, Success}

object ResponseHelper {

  implicit class ResponseFuture(f: Future[HttpClient#Response]) {

    def json: Future[(JSONObject, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONObject(body), response)
      }
    }

    def jsonArray: Future[(JSONArray, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONArray(body), response)
      }
    }

    def string: Future[(String, HttpClient#Response)] = {
      f.bytes.map { case (body, response) =>
        val writer = new StringWriter(body.length)
        body foreach {(char) =>
          writer.append(char.toChar)
        }
        (writer.toString, response)
      }
    }

    def bytes: Future[(Array[Byte], HttpClient#Response)] = {
      f.map { response =>
        val input = response.body
        val output = new ByteArrayOutputStream()
        val buffer = new Array[Byte](4096)
        var read = 0
        while ({read = input.read(buffer); read} > 0) {
          output.write(buffer, 0, read)
        }
        val result = output.toByteArray
        input.close()
        output.close()
        (result, response)
      }
    }

    def noResponseBody: Future[HttpClient#Response] = {
      f.andThen {
        case Success(response) =>
          response.body.close()
          response
        case Failure(cause) =>
          throw cause
      }
    }

  }

} 
Example 200
Source File: utils.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.http

import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.TimestampType
import org.apache.spark.SparkConf
import org.apache.commons.io.IOUtils
import org.apache.spark.serializer.KryoSerializer
import java.io.InputStream
import com.esotericsoftware.kryo.io.Input
import java.io.ByteArrayOutputStream

class WrongArgumentException(name: String, value: Any)
		extends RuntimeException(s"wrong argument: $name=$value") {
}

class MissingRequiredArgumentException(map: Map[String, String], paramName: String)
		extends RuntimeException(s"missing required argument: $paramName, all parameters=$map") {
}

class InvalidSerializerNameException(serializerName: String)
		extends RuntimeException(s"invalid serializer name: $serializerName") {
}

object SchemaUtils {
	def buildSchema(schema: StructType, includesTimestamp: Boolean, timestampColumnName: String = "_TIMESTAMP_"): StructType = {
		if (!includesTimestamp)
			schema;
		else
			StructType(schema.fields.toSeq :+ StructField(timestampColumnName, TimestampType, false));
	}
}

object Params {
	
	def deserialize(bytes: Array[Byte]): Any = {
		val kryo = kryoSerializer.newKryo();
		val input = new Input();
		input.setBuffer(bytes);
		kryo.readClassAndObject(input);
	}
}