org.scalatest.Ignore Scala Examples

The following examples show how to use org.scalatest.Ignore. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

Example 1

Source File: TestSIWithAddSegment.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.testsuite.secondaryindex

import org.apache.spark.sql.CarbonEnv
import org.apache.spark.sql.secondaryindex.joins.BroadCastSIFilterPushJoin
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.{BeforeAndAfterAll, Ignore}

import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.metadata.datatype.Field
import org.apache.carbondata.core.util.path.CarbonTablePath
import org.apache.carbondata.sdk.file.{CarbonSchemaReader, CarbonWriterBuilder, Schema}

class TestSIWithAddSegment extends QueryTest with BeforeAndAfterAll {

  val newSegmentPath: String = warehouse + "/newsegment/"

  override protected def beforeAll(): Unit = {
    dropTables()
    FileFactory.getCarbonFile(newSegmentPath).delete()
    sql("create table maintable(a string, b int, c string) stored as carbondata")
    sql("insert into maintable select 'k',1,'k'")
    sql("insert into maintable select 'l',2,'l'")
    sql("CREATE INDEX maintable_si  on table maintable (c) as 'carbondata'")
    val carbonTable = CarbonEnv.getCarbonTable(None, "maintable")(sqlContext.sparkSession)
    val segmentPath = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, "0")
    val schema = CarbonSchemaReader.readSchema(segmentPath).asOriginOrder()
    val writer = new CarbonWriterBuilder()
      .outputPath(newSegmentPath).withCsvInput(schema).writtenBy("TestSIWithAddSegment").build()
    writer.write(Array[String]("m", "3", "m"))
    writer.close()
    sql(s"alter table maintable add segment options('path'='${ newSegmentPath }', " +
        s"'format'='carbon')")
  }

  override protected def afterAll(): Unit = {
    dropTables()
    FileFactory.getCarbonFile(newSegmentPath).delete()
  }

  private def dropTables(): Unit = {
    sql("drop table if exists maintable")
    sql("drop table if exists maintable1")
  }

  test("test if the query hits SI after adding a segment to the main table") {
    val d = sql("select * from maintable where c = 'm'")
    assert(d.queryExecution.executedPlan.isInstanceOf[BroadCastSIFilterPushJoin])
  }

  test("compare results of SI and NI after adding segments") {
    val siResult = sql("select * from maintable where c = 'm'")
    val niResult = sql("select * from maintable where ni(c = 'm')")
    assert(!niResult.queryExecution.executedPlan.isInstanceOf[BroadCastSIFilterPushJoin])
    checkAnswer(siResult, niResult)
  }

  test("test SI creation after adding segments") {
    sql("create table maintable1(a string, b int, c string) stored as carbondata")
    sql("insert into maintable1 select 'k',1,'k'")
    sql("insert into maintable1 select 'l',2,'l'")
    val carbonTable = CarbonEnv.getCarbonTable(None, "maintable1")(sqlContext.sparkSession)
    val segmentPath = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, "0")
    val schema = CarbonSchemaReader.readSchema(segmentPath).asOriginOrder()
    val writer = new CarbonWriterBuilder()
      .outputPath(newSegmentPath).withCsvInput(schema).writtenBy("TestSIWithAddSegment").build()
    writer.write(Array[String]("m", "3", "m"))
    writer.close()
    sql(s"alter table maintable1 add segment options('path'='${ newSegmentPath }', " +
        s"'format'='carbon')")
    sql("CREATE INDEX maintable1_si  on table maintable1 (c) as 'carbondata'")
    assert(sql("show segments for table maintable1_si").collect().length ==
           sql("show segments for table maintable1").collect().length)
    val siResult = sql("select * from maintable1 where c = 'm'")
    val niResult = sql("select * from maintable1 where ni(c = 'm')")
    assert(!niResult.queryExecution.executedPlan.isInstanceOf[BroadCastSIFilterPushJoin])
    checkAnswer(siResult, niResult)
  }

  test("test query on SI with all external segments") {
    sql("drop table if exists maintable1")
    sql("create table maintable1(a string, b int, c string) stored as carbondata")
    sql("CREATE INDEX maintable1_si  on table maintable1 (c) as 'carbondata'")
    val fields = Array(new Field("a", "string"), new Field("b", "int"), new Field("c", "string"))
    val writer = new CarbonWriterBuilder()
      .outputPath(newSegmentPath)
      .withCsvInput(new Schema(fields))
      .writtenBy("TestSIWithAddSegment")
      .build()
    writer.write(Array[String]("m", "3", "m"))
    writer.close()
    sql(s"alter table maintable1 add segment options('path'='${ newSegmentPath }', " +
        s"'format'='carbon')")
    val siResult = sql("select * from maintable1 where c = 'm'")
    val niResult = sql("select * from maintable1 where ni(c = 'm')")
    checkAnswer(siResult, niResult)
  }
}

Example 2

Source File: DataFrameReportPerformanceSpec.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import io.deepsense.commons.utils.{DoubleUtils, Logging}
import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

Example 3

Source File: TwitterPopularTagsTest.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.streaming.twitter

import com.github.dnvriend.TestSpec
import com.github.dnvriend.spark.Tweet
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.dstream.{ DStream, ReceiverInputDStream }
import org.apache.spark.streaming.twitter.TwitterUtils
import org.scalatest.Ignore
import pprint.Config.Colors.PPrintConfig
import pprint._
import twitter4j.Status

// see: https://dev.twitter.com/streaming/overview
// see: https://dev.twitter.com/streaming/public
// see: https://support.twitter.com/articles/20174643
// see: https://github.com/apache/bahir/blob/master/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
// see: http://blog.originate.com/blog/2014/06/15/idiomatic-scala-your-options-do-not-match/

@Ignore
class TwitterPopularTagsTest extends TestSpec {
  it should "find popular tags" in withStreamingContext(2, await = true) { spark => ssc =>

    //    val filters = Array("#scala", "#akka", "#spark", "@scala", "@akka", "@spark")
    val filters = Array("#summercamp", "#akka", "#scala", "#fastdata", "#spark", "#hadoop")
    val stream: ReceiverInputDStream[Status] = TwitterUtils.createStream(ssc, None, filters)

    val msgs: DStream[Tweet] =
      stream
        .map(Tweet(_))

    msgs.foreachRDD { rdd =>
      rdd.take(10).foreach(pprint.pprintln)
    }

    val hashTags: DStream[String] =
      stream
        .filter(_.getLang == "en")
        .flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))

    val topCounts60 =
      hashTags
        .map((_, 1))
        .reduceByKeyAndWindow(_ + _, Seconds(60))
        .map { case (topic, count) => (count, topic) }
        .transform(_.sortByKey(ascending = false))

    val topCounts10 =
      hashTags
        .map((_, 1))
        .reduceByKeyAndWindow(_ + _, Seconds(10))
        .map { case (topic, count) => (count, topic) }
        .transform(_.sortByKey(false))

    topCounts60.foreachRDD(rdd => {
      val topList = rdd.take(10)
      pprint.pprintln("\nPopular topics in last 60 seconds (%s total):".format(rdd.count()))
      topList.foreach { case (count, tag) => println("%s (%s tweets)".format(tag, count)) }
    })

    topCounts10.foreachRDD(rdd => {
      val topList = rdd.take(10)
      pprint.pprintln("\nPopular topics in last 10 seconds (%s total):".format(rdd.count()))
      topList.foreach { case (count, tag) => println("%s (%s tweets)".format(tag, count)) }
    })

    ssc.start()
  }
}

Example 4

Source File: CurrentPersistenceIdsQuerySourceTest.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.sstreaming

import java.util.UUID
import java.util.concurrent.atomic.AtomicLong

import akka.actor.{ ActorRef, Props }
import akka.persistence.PersistentActor
import akka.testkit.TestProbe
import com.github.dnvriend.TestSpec
import com.github.dnvriend.spark.datasources.SparkImplicits._
import com.github.dnvriend.spark.datasources.person.Person
import org.apache.spark.sql.streaming.{ OutputMode, ProcessingTime }
import org.scalatest.Ignore

import scala.concurrent.ExecutionContext
import scala.concurrent.duration._
import scala.language.implicitConversions

object PersonActor {
  final case class BlogPost(id: Long, text: String)
}
class PersonActor(val persistenceId: String, schedule: Boolean)(implicit ec: ExecutionContext) extends PersistentActor {
  val counter = new AtomicLong()
  def ping() = context.system.scheduler.scheduleOnce(200.millis, self, "persist")
  def randomId: String = UUID.randomUUID.toString
  override val receiveRecover: Receive = PartialFunction.empty
  override val receiveCommand: Receive = {
    case "persist" =>
      persist(Person(counter.incrementAndGet(), s"foo-$randomId", 20)) { _ =>
        sender() ! "ack"
      }
      if (schedule) ping()
  }
  if (schedule) ping()
}

@Ignore
class CurrentPersistenceIdsQuerySourceTest extends TestSpec {
  def withPersistentActor(pid: String = randomId, schedule: Boolean = false)(f: ActorRef => TestProbe => Unit): Unit = {
    val tp = TestProbe()
    val ref = system.actorOf(Props(new PersonActor(pid, schedule)))
    try f(ref)(tp) finally killActors(ref)
  }

  it should "query read journal" in withSparkSession { spark =>
    withPersistentActor() { ref => tp =>
      tp.send(ref, "persist")
      tp.expectMsg("ack")

      val jdbcReadJournal = spark.readStream
        .currentPersistenceIds("jdbc-read-journal")

      jdbcReadJournal.printSchema()

      println("Is the query streaming: " + jdbcReadJournal.isStreaming)
      println("Are there any streaming queries? " + spark.streams.active.isEmpty)

      val query = jdbcReadJournal
        .writeStream
        .format("console")
        .trigger(ProcessingTime(1.seconds))
        .queryName("consoleStream")
        .outputMode(OutputMode.Append())
        .start()

      query.awaitTermination(10.seconds)
    }
  }
}

Example 5

Source File: SocketWordCountTest.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.sstreaming

import akka.Done
import akka.stream.scaladsl.Tcp._
import akka.stream.scaladsl.{ Flow, Sink, Source, Tcp }
import akka.util.ByteString
import com.github.dnvriend.TestSpec
import org.scalatest.Ignore

import scala.collection.immutable._
import scala.concurrent.Future
import scala.concurrent.duration._

@Ignore
class SocketWordCountTest extends TestSpec {
  def withSocketServer(xs: Seq[String])(f: Future[Done] => Unit): Unit = {
    val connections: Source[IncomingConnection, Future[ServerBinding]] = Tcp().bind("127.0.0.1", 9999)
    val socketServer = connections.runForeach { connection =>
      println(s"New connection from: ${connection.remoteAddress}")
      val src = Source.cycle(() => xs.iterator).map(txt => ByteString(txt) ++ ByteString("\n"))
        .flatMapConcat(msg => Source.tick(0.seconds, 200.millis, msg))
      val echo = Flow.fromSinkAndSource(Sink.ignore, src)
      connection.handleWith(echo)
    }
    f(socketServer)
  }

  it should "a running word count of text data received via a TCP server" in withSparkSession { spark =>
    withSocketServer(List("apache spark")) { socketServer =>
      import spark.implicits._

      val lines = spark.readStream
        .format("socket")
        .option("host", "localhost")
        .option("port", 9999)
        .load()

      // Split the lines into words
      val words = lines.as[String].flatMap(_.split(" "))

      // Generate running word count
      val wordCounts = words.groupBy("value").count()

      // Start running the query that prints the running counts to the console
      val query = wordCounts.writeStream
        .outputMode("complete")
        .format("console")
        .start()

      query.awaitTermination(10.seconds)
    }
  }
}

Example 6

Source File: QueryCsvTest.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.sstreaming

import com.github.dnvriend.TestSpec
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.streaming.{ OutputMode, ProcessingTime }
import org.apache.spark.sql.types._
import org.scalatest.Ignore

import scala.concurrent.duration._
import scala.language.implicitConversions

@Ignore
class QueryCsvTest extends TestSpec {
  def copyFiles(nrTimes: Int = 10): Unit = {
    FileUtils.deleteDirectory("/tmp/csv")
    FileUtils.forceMkdir("/tmp/csv")
    (1 to nrTimes).foreach { x =>
      FileUtils.copyFile(TestSpec.PeopleCsv, s"/tmp/csv/people-$x")
    }
  }

  val schema: StructType = StructType(Array(
    StructField("id", LongType, nullable = false),
    StructField("name", StringType, nullable = true),
    StructField("age", IntegerType, nullable = true)
  ))

  it should "query csv file" in withSparkSession { spark =>
    copyFiles()

    val csv = spark.readStream
      .schema(schema)
      .format("csv")
      .option("maxFilesPerTrigger", 1)
      .option("header", "false") // Use first line of all files as header
      .option("inferSchema", "false") // Automatically infer data types
      .option("delimiter", ";")
      .load("/tmp/csv")

    csv.printSchema()

    println("Is the query streaming: " + csv.isStreaming)
    println("Are there any streaming queries? " + spark.streams.active.isEmpty)

    val query = csv
      .writeStream
      .format("console")
      .trigger(ProcessingTime(5.seconds))
      .queryName("consoleStream")
      .outputMode(OutputMode.Append())
      .start()

    // waiting for data
    sleep(3.seconds)
    spark.streams
      .active
      .foreach(println)

    spark.streams
      .active
      .foreach(_.explain(extended = true))

    query.awaitTermination(20.seconds)
  }
}

Example 7

Source File: CurrentEventsByPersistenceIdQueryTest.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.sstreaming

import akka.actor.{ ActorRef, Props }
import akka.testkit.TestProbe
import com.github.dnvriend.TestSpec
import com.github.dnvriend.spark.datasources.SparkImplicits._
import com.github.dnvriend.spark.mapper.PersonEventMapper
import org.apache.spark.sql.streaming.{ OutputMode, ProcessingTime }
import org.apache.spark.sql.functions._
import org.scalatest.Ignore

import scala.concurrent.duration._

@Ignore
class CurrentEventsByPersistenceIdQueryTest extends TestSpec {
  def withPersistentActor(pid: String = randomId, schedule: Boolean = false)(f: ActorRef => TestProbe => Unit): Unit = {
    val tp = TestProbe()
    val ref = system.actorOf(Props(new PersonActor(pid, schedule)))
    try f(ref)(tp) finally killActors(ref)
  }

  it should "read events for pid" in withSparkSession { spark =>
    import spark.implicits._
    withPersistentActor("person", schedule = true) { ref => tp =>

      tp.send(ref, "persist")
      tp.expectMsg("ack")

      val jdbcReadJournal = spark.readStream
        .schema(PersonEventMapper.schema)
        .option("pid", "person")
        .option("event-mapper", "com.github.dnvriend.spark.mapper.PersonEventMapper")
        .eventsByPersistenceId("jdbc-read-journal")

      jdbcReadJournal.printSchema()

      //      val numOfEvents = jdbcReadJournal
      //        .groupBy('persistence_id)
      //        .agg(count('sequence_number).as("number_of_events"))

      val query = jdbcReadJournal
        .writeStream
        .format("console")
        .trigger(ProcessingTime(1.seconds))
        .queryName("consoleStream")
        //        .outputMode(OutputMode.Complete())
        .outputMode(OutputMode.Append())
        .start()

      query.awaitTermination(20.seconds)
    }
  }
}

Example 8

Source File: OkHttpSyncDigestAuthProxyManualTest.scala From sttp with Apache License 2.0

5 votes

package sttp.client.okhttp

import org.scalatest.Ignore
import sttp.client._
import sttp.client.testing.{ConvertToFuture, ToFutureWrapper}
import org.scalatest.freespec.AsyncFreeSpec
import org.scalatest.matchers.should.Matchers

@Ignore
class OkHttpSyncDigestAuthProxyManualTest extends AsyncFreeSpec with Matchers with ToFutureWrapper {
  implicit val backend: SttpBackend[Identity, Nothing, NothingT] =
    new DigestAuthenticationBackend[Identity, Nothing, NothingT](
      OkHttpSyncBackend(options = SttpBackendOptions.httpProxy("localhost", 3128))
    )

  implicit val convertToFuture: ConvertToFuture[Identity] = ConvertToFuture.id

  "complex proxy auth with digest" in {
    val response = basicRequest
      .get(uri"http://httpbin.org/digest-auth/auth/andrzej/test/SHA-512")
      .auth
      .digest("andrzej", "test")
      .proxyAuth
      .digest("kasper", "qweqwe")
      .send()
    response.code.code shouldBe 200
  }
}

Example 9

Source File: ProxyIntegrationTest.scala From Neutrino with Apache License 2.0

5 votes

package com.ebay.neutrino.integ

import com.ebay.neutrino.config.{Configuration, LoadBalancer, VirtualPool, VirtualServer}
import com.ebay.neutrino.handler.{ExampleCloseHandler, ExamplePipelineHandler}
import com.ebay.neutrino.{NettyClientSupport, NeutrinoCore}
import io.netty.channel.{Channel, ChannelInitializer}
import io.netty.handler.codec.http.{DefaultFullHttpRequest, HttpMethod, HttpVersion}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Ignore, Matchers}

import scala.concurrent.Await
import scala.concurrent.duration._



@Ignore
class ProxyIntegrationTest extends FlatSpec with NettyClientSupport with Matchers with BeforeAndAfterAll
{
  // Create a new balancer
  val config = Configuration.load("proxy.conf")
  val core   = NeutrinoCore(config)
  val server = new NettyEchoServer()


  override def beforeAll() = {
    val servers = Seq(VirtualServer("id", "localhost", 8081))
    val pools   = Seq(VirtualPool(servers=servers))

    // Start running the downstream server
    server.start()

    // Start running the proxy. This will run until the process is interrupted...
    core.configure(LoadBalancer("id", pools))
    Await.ready(core.start(), 5 seconds)
  }

  override def afterAll() = {
    Await.ready(core.shutdown(), 5 seconds)
    server.shutdown()
  }


  it should "run 10000 requests" in {

    // We'll have to connect as well
    val client = HttpClient(port=8080)
    val request = new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, "/")

    for (i <- 0 until 10000) {
      val conn = client.send(request)
      conn.channel.close()
    }
  }

}



class ProxyIntegrationInitializer extends ChannelInitializer[Channel] {

  // Initialize the user-configurable pipeline
  protected def initChannel(ch: Channel): Unit = {
    ch.pipeline.addLast(new ExampleCloseHandler())
    ch.pipeline.addLast(new ExamplePipelineHandler())
    //pipeline.addLast(new ExampleCustomHandler())
  }
}

Example 10

Source File: DataFrameReportPerformanceSpec.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import ai.deepsense.commons.utils.{DoubleUtils, Logging}
import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report()
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

Example 11

Source File: HistogramTrendsSpec.scala From haystack-trends with Apache License 2.0

5 votes

package com.expedia.www.haystack.trends.integration.tests

import com.expedia.metrics.MetricData
import com.expedia.www.haystack.trends.integration.IntegrationTestSpec
import org.apache.kafka.streams.KeyValue
import org.apache.kafka.streams.integration.utils.IntegrationTestUtils
import org.scalatest.{Ignore, Sequential}

import scala.collection.JavaConverters._
import scala.concurrent.duration._

@Ignore
@Sequential
class HistogramTrendsSpec extends IntegrationTestSpec {

  private val MAX_METRICPOINTS = 62
  private val numberOfWatermarkedWindows = 1

  "TimeSeriesAggregatorTopology" should {

    "aggregate histogram type metricPoints from input topic based on rules" in {
      Given("a set of metricPoints with type metric and kafka specific configurations")
      val METRIC_NAME = "duration"
      //HistogramMetric
      val expectedOneMinAggregatedPoints: Int = (MAX_METRICPOINTS - 1 - numberOfWatermarkedWindows) * 7
      // Why one less -> won't be generated for  last (MAX_METRICPOINTS * 60)th second metric point
      val expectedFiveMinAggregatedPoints: Int = (MAX_METRICPOINTS / 5 - numberOfWatermarkedWindows) * 7
      val expectedFifteenMinAggregatedPoints: Int = (MAX_METRICPOINTS / 15) * 7
      val expectedOneHourAggregatedPoints: Int = (MAX_METRICPOINTS / 60) * 7
      val expectedTotalAggregatedPoints: Int = expectedOneMinAggregatedPoints + expectedFiveMinAggregatedPoints + expectedFifteenMinAggregatedPoints + expectedOneHourAggregatedPoints
      val streamsRunner = createStreamRunner()

      When("metricPoints are produced in 'input' topic async, and kafka-streams topology is started")
      produceMetricPointsAsync(MAX_METRICPOINTS, 10.milli, METRIC_NAME, MAX_METRICPOINTS * 60)
      streamsRunner.start()

      Then("we should read all aggregated metricData from 'output' topic")
      val waitTimeMs = 15000
      val result: List[KeyValue[String, MetricData]] =
        IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived[String, MetricData](RESULT_CONSUMER_CONFIG, OUTPUT_TOPIC, expectedTotalAggregatedPoints, waitTimeMs).asScala.toList
      validateAggregatedMetricPoints(result, expectedOneMinAggregatedPoints, expectedFiveMinAggregatedPoints, expectedFifteenMinAggregatedPoints, expectedOneHourAggregatedPoints)
    }
  }
}

Example 12

Source File: Downloader$Test.scala From mystem-scala with MIT License

5 votes

package ru.stachek66.tools

import java.io.File
import java.net.URL

import org.junit.runner.RunWith
import org.scalatest.{Ignore, FunSuite}
import org.scalatest.junit.JUnitRunner


@Ignore
class Downloader$Test extends FunSuite {

  test("downloading-something") {

    val hello = new File("hello-test.html")
    val mystem = new File("atmta.binary")

    Downloader.downloadBinaryFile(new URL("http://www.stachek66.ru/"), hello)

    Downloader.downloadBinaryFile(
      new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"),
      mystem
    )

    Downloader.downloadBinaryFile(
      new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip"),
      mystem
    )

    hello.delete
    mystem.delete
  }

  test("download-and-unpack") {
    val bin = new File("atmta.binary.tar.gz")
    val bin2 = new File("executable")

    Decompressor.select.unpack(
      Downloader.downloadBinaryFile(
        new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"),
        bin),
      bin2
    )

    bin.delete
    bin2.delete
  }
}

Example 13

Source File: CouchbaseJournalPerfSpec.scala From akka-persistence-couchbase with Apache License 2.0

5 votes

package akka.persistence.couchbase

import akka.persistence.CapabilityFlag
import akka.persistence.journal.JournalPerfSpec
import akka.testkit.WithLogCapturing
import com.typesafe.config.ConfigFactory
import org.scalatest.Ignore

import scala.concurrent.duration._

// this test is quite heavy and we don't want to run it on travis
// remove annotation to run locally
@Ignore
class CouchbaseJournalPerfSpec
    extends JournalPerfSpec(ConfigFactory.parseString("""
          akka.loglevel = debug
          akka.loggers = ["akka.testkit.SilenceAllTestEventListener"]
        """).withFallback(ConfigFactory.load()))
    with CouchbaseBucketSetup
    with WithLogCapturing {
  override def awaitDurationMillis: Long = 20.seconds.toMillis

  // We want to test with persisting guaranteed, which makes
  // it quite slow. This was adjusted to pass on travis.
  override def eventsCount: Int = 1000

  override protected def supportsRejectingNonSerializableObjects: CapabilityFlag = false
}

Example 14

Source File: AmqpSubscriberPerfSpec.scala From reliable-http-client with Apache License 2.0

5 votes

package rhttpc.transport.amqp

import akka.Done
import akka.actor.{Actor, ActorSystem, Props}
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpRequest, HttpResponse}
import akka.pattern._
import akka.stream.ActorMaterializer
import akka.testkit.{TestKit, TestProbe}
import dispatch.url
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Ignore}
import rhttpc.transport.{Deserializer, InboundQueueData, OutboundQueueData, Serializer}

import scala.concurrent.duration._
import scala.concurrent.{Await, Future}
import scala.util.{Random, Try}

@Ignore
class AmqpSubscriberPerfSpec extends TestKit(ActorSystem("AmqpSubscriberPerfSpec")) with FlatSpecLike with BeforeAndAfterAll {
  import system.dispatcher

  implicit val materializer = ActorMaterializer()

  implicit def serializer[Msg] = new Serializer[Msg] {
    override def serialize(obj: Msg): String = obj.toString
  }

  implicit def deserializer[Msg] = new Deserializer[Msg] {
    override def deserialize(value: String): Try[Msg] = Try(value.asInstanceOf[Msg])
  }

  val queueName = "request"
  val outboundQueueData = OutboundQueueData(queueName, autoDelete = true, durability = false)
  val inboundQueueData = InboundQueueData(queueName, batchSize = 10, parallelConsumers = 10, autoDelete = true, durability = false)
  val count = 100

  private val interface = "localhost"
  private val port = 8081

  def handle(request: HttpRequest) = {
    val delay = 5 + Random.nextInt(10)
    after(delay.seconds, system.scheduler)(Future.successful(HttpResponse()))
  }

  it should "have a good throughput" in {
    val bound = Await.result(
      Http().bindAndHandleAsync(
        handle, interface, port
      ),
      5.seconds
    )
    val http = dispatch.Http()
//      .configure(_.setMaxConnections(count)
//        .setExecutorService(Executors.newFixedThreadPool(count)))

    val connection = Await.result(AmqpConnectionFactory.connect(system), 5 seconds)
    val transport = AmqpTransport(
      connection = connection
    )
    val publisher = transport.publisher[String](outboundQueueData)
    val probe = TestProbe()
    val actor = system.actorOf(Props(new Actor {
      override def receive: Receive = {
        case str: String =>
          http(url(s"http://$interface:$port") OK identity).map(_ => Done).pipeTo(self)(sender())
        case Done =>
          probe.ref ! Done
          sender() ! Done
      }
    }))
    val subscriber = transport.subscriber[String](inboundQueueData, actor)
    subscriber.start()

    try {
      measureMeanThroughput(count) {
        (1 to count).foreach { _ => publisher.publish("x") }

        probe.receiveWhile(10 minutes, messages = count) { case a => a }
      }
    } finally {
      Await.result(subscriber.stop(), 5.seconds)
      connection.close(5 * 1000)
      Await.result(bound.unbind(), 5.seconds)
    }
  }

  def measureMeanThroughput(count: Int)(consume: => Unit) = {
    val before = System.currentTimeMillis()
    consume
    val msgsPerSecond = count / ((System.currentTimeMillis() - before).toDouble / 1000)
    println(s"Throughput was: $msgsPerSecond msgs/sec")
  }

  override protected def afterAll(): Unit = {
    shutdown()
  }
}

Example 15

Source File: LuceneCoarseGrainIndexSuite.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.index.lucene

import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.{BeforeAndAfterAll, Ignore}


@Ignore
class LuceneCoarseGrainIndexSuite extends QueryTest with BeforeAndAfterAll {

  val file2 = resourcesPath + "/index_input.csv"

  override protected def beforeAll(): Unit = {
    //n should be about 5000000 of reset if size is default 1024
    val n = 15000
    LuceneFineGrainIndexSuite.createFile(file2, n * 4, n)
    sql("DROP TABLE IF EXISTS normal_test")
    sql(
      """
        | CREATE TABLE normal_test(id INT, name STRING, city STRING, age INT)
        | STORED AS carbondata
        | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT')
      """.stripMargin)
    sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE normal_test OPTIONS('header'='false')")
  }

  test("test lucene coarse grain index") {
    sql("DROP TABLE IF EXISTS index_test")
    sql(
      """
        | CREATE TABLE index_test(id INT, name STRING, city STRING, age INT)
        | STORED AS carbondata
        | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT')
      """.stripMargin)

    sql(
      s"""
         | CREATE INDEX dm
         | ON index_test (name, city)
         | AS 'lucene'
      """.stripMargin)

    sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE index_test OPTIONS('header'='false')")

    checkAnswer(sql("select * from index_test where name='n502670'"),
      sql("select * from normal_test where name='n502670'"))
  }

  override protected def afterAll(): Unit = {
    LuceneFineGrainIndexSuite.deleteFile(file2)
    sql("DROP TABLE IF EXISTS normal_test")
    sql("DROP TABLE IF EXISTS index_test")
  }

}

Example 16

Source File: ArangoSparkReadTest.scala From arangodb-spark-connector with Apache License 2.0

5 votes

package com.arangodb.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.scalatest.BeforeAndAfterAll
import org.scalatest.BeforeAndAfterEach
import org.scalatest.FunSuite
import org.scalatest.Matchers
import collection.JavaConverters._
import com.arangodb.ArangoDB
import com.arangodb.ArangoDBException
import com.arangodb.velocypack.VPackBuilder
import com.arangodb.velocypack.ValueType
import scala.reflect.ClassTag
import com.arangodb.spark.rdd.partition.ArangoPartitionerSinglePartition
import org.scalatest.Ignore
import com.arangodb.entity.LoadBalancingStrategy

class ArangoSparkReadTest extends FunSuite with Matchers with BeforeAndAfterAll with BeforeAndAfterEach with SharedSparkContext {

  val DB = "spark_test_db"
  val COLLECTION = "spark_test_col"
  val arangoDB = new ArangoDB.Builder().build()

  override def beforeAll() {
    super.beforeAll()
    try {
      arangoDB.db(DB).drop()
    } catch {
      case e: ArangoDBException =>
    }
    arangoDB.createDatabase(DB)
    arangoDB.db(DB).createCollection(COLLECTION)
    val documents = sc.parallelize((1 to 100).map { i => TestEntity(i) })
    ArangoSpark.save(documents, COLLECTION, WriteOptions(DB))
  }

  override def afterAll() {
    try {
      arangoDB.db(DB).drop()
      arangoDB.shutdown()
    } finally {
      super.afterAll()
    }
  }

  test("load all documents from collection") {
    val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB))
    rdd.count() should be(100)
  }

  test("load with custom partionier") {
    val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB, partitioner = new ArangoPartitionerSinglePartition()))
    rdd.count() should be(100)
  }

  test("load documents from collection with filter statement") {
    val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB))
    val rdd2 = rdd.filter("doc.test <= 50")
    rdd2.count() should be(50)
  }
  
  test("load all documents from collection with load balancing") {
	  val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB).acquireHostList(false).loadBalancingStrategy(LoadBalancingStrategy.ROUND_ROBIN))
	  rdd.count() should be(100)
  }

}

Example 17

Source File: ArangoSparkSSLWriteTest.scala From arangodb-spark-connector with Apache License 2.0

5 votes

package com.arangodb.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.scalatest.BeforeAndAfterAll
import org.scalatest.BeforeAndAfterEach
import org.scalatest.FunSuite
import org.scalatest.Matchers

import com.arangodb.ArangoDB
import com.arangodb.ArangoDBException
import com.arangodb.velocypack.VPackBuilder
import com.arangodb.velocypack.ValueType
import org.scalatest.Ignore

@Ignore
class ArangoSparkSSLWriteTest extends FunSuite with Matchers with BeforeAndAfterAll with BeforeAndAfterEach with SharedSparkContextSSL {

  val DB = "spark_test_db"
  val COLLECTION = "spark_test_col"
  val arangoDB = new ArangoDB.Builder().build()

  override def beforeAll() {
    super.beforeAll()
    try {
      arangoDB.db(DB).drop()
    } catch {
      case e: ArangoDBException =>
    }
    arangoDB.createDatabase(DB)
    arangoDB.db(DB).createCollection(COLLECTION)
  }

  override def afterAll() {
    try {
      arangoDB.db(DB).drop()
      arangoDB.shutdown()
    } finally {
      super.afterAll()
    }
  }

  override def afterEach() {
    arangoDB.db(DB).collection(COLLECTION).truncate()
  }

  private def checkDocumentCount(count: Int) {
    arangoDB.db(DB).collection(COLLECTION).count().getCount should be(count)
  }

  test("save RDD to ArangoDB") {
    checkDocumentCount(0)

    val documents = sc.parallelize((1 to 100).map { i => TestEntity(i) })
    ArangoSpark.save(documents, COLLECTION, WriteOptions(DB))

    checkDocumentCount(100)
  }

  test("save RDD[VPackSlice] to ArangoDB") {
    checkDocumentCount(0)

    val documents = sc.parallelize((1 to 100).map { i => new VPackBuilder().add(ValueType.OBJECT).add("test", Integer.valueOf(i)).close().slice() })
    ArangoSpark.save(documents, COLLECTION, WriteOptions(DB))

    checkDocumentCount(100)
  }

  test("save DataFrame to ArangoDB") {
    checkDocumentCount(0)

    val documents = sc.parallelize((1 to 100).map { i => TestEntity(i) })
    val sql: SQLContext = SQLContext.getOrCreate(sc);
    val df = sql.createDataFrame(documents, classOf[TestEntity])
    ArangoSpark.saveDF(df, COLLECTION, WriteOptions(DB))

    checkDocumentCount(100)
  }

  test("save Dataset to ArangoDB") {
    checkDocumentCount(0)

    val documents = sc.parallelize((1 to 100).map { i => TestEntity(i) })
    val sql: SQLContext = SQLContext.getOrCreate(sc);
    val encoder = ExpressionEncoder.javaBean(classOf[TestEntity])
    val ds = sql.createDataset(documents)(encoder);
    ArangoSpark.save(ds, COLLECTION, WriteOptions(DB))

    checkDocumentCount(100)
  }

}

Example 18

Source File: ArangoSparkSSLReadTest.scala From arangodb-spark-connector with Apache License 2.0

5 votes

package com.arangodb.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.scalatest.BeforeAndAfterAll
import org.scalatest.BeforeAndAfterEach
import org.scalatest.FunSuite
import org.scalatest.Matchers
import collection.JavaConverters._
import com.arangodb.ArangoDB
import com.arangodb.ArangoDBException
import com.arangodb.velocypack.VPackBuilder
import com.arangodb.velocypack.ValueType
import scala.reflect.ClassTag
import com.arangodb.spark.rdd.partition.ArangoPartitionerSinglePartition
import org.scalatest.Ignore

@Ignore
class ArangoSparkSSLReadTest extends FunSuite with Matchers with BeforeAndAfterAll with BeforeAndAfterEach with SharedSparkContextSSL {

  val DB = "spark_test_db"
  val COLLECTION = "spark_test_col"
  val arangoDB = new ArangoDB.Builder().build()

  override def beforeAll() {
    super.beforeAll()
    try {
      arangoDB.db(DB).drop()
    } catch {
      case e: ArangoDBException =>
    }
    arangoDB.createDatabase(DB)
    arangoDB.db(DB).createCollection(COLLECTION)
    val documents = sc.parallelize((1 to 100).map { i => TestEntity(i) })
    ArangoSpark.save(documents, COLLECTION, WriteOptions(DB))
  }

  override def afterAll() {
    try {
      arangoDB.db(DB).drop()
      arangoDB.shutdown()
    } finally {
      super.afterAll()
    }
  }

  test("load all documents from collection") {
    val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB))
    rdd.count() should be(100)
  }

  test("load with custom partionier") {
    val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB, partitioner = new ArangoPartitionerSinglePartition()))
    rdd.count() should be(100)
  }

  test("load documents from collection with filter statement") {
    val rdd = ArangoSpark.load[TestEntity](sc, COLLECTION, ReadOptions(DB))
    val rdd2 = rdd.filter("doc.test <= 50")
    rdd2.count() should be(50)
  }
}

Example 19

Source File: OapQuerySuite.scala From OAP with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.execution

import java.util.{Locale, TimeZone}

import org.scalatest.{BeforeAndAfter, Ignore}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.internal.SQLConf

// Ignore because in separate package will encounter problem with shaded spark source.
@Ignore
class OapQuerySuite extends HiveComparisonTest with BeforeAndAfter  {
  private lazy val originalTimeZone = TimeZone.getDefault
  private lazy val originalLocale = Locale.getDefault
  import org.apache.spark.sql.hive.test.TestHive._

  // Note: invoke TestHive will create a SparkContext which can't be configured by us.
  // So be careful this may affect current using SparkContext and cause strange problem.
  private lazy val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled

  override def beforeAll() {
    super.beforeAll()
    TestHive.setCacheTables(true)
    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
    // Add Locale setting
    Locale.setDefault(Locale.US)
    // Ensures that cross joins are enabled so that we can test them
    TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
    TestHive.setConf(HiveUtils.CONVERT_METASTORE_PARQUET, true)
  }

  override def afterAll() {
    try {
      TestHive.setCacheTables(false)
      TimeZone.setDefault(originalTimeZone)
      Locale.setDefault(originalLocale)
      sql("DROP TEMPORARY FUNCTION IF EXISTS udtf_count2")
      TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
    } finally {
      super.afterAll()
    }
  }
  private def assertDupIndex(body: => Unit): Unit = {
    val e = intercept[AnalysisException] { body }
    assert(e.getMessage.toLowerCase.contains("exists"))
  }

  test("create hive table in parquet format") {
    try {
      sql("create table p_table (key int, val string) stored as parquet")
      sql("insert overwrite table p_table select * from src")
      sql("create oindex if not exists p_index on p_table(key)")
      assert(sql("select val from p_table where key = 238")
        .collect().head.getString(0) == "val_238")
    } finally {
      sql("drop oindex p_index on p_table")
      sql("drop table p_table")
    }
  }

  test("create duplicate hive table in parquet format") {
    try {
      sql("create table p_table1 (key int, val string) stored as parquet")
      sql("insert overwrite table p_table1 select * from src")
      sql("create oindex p_index on p_table1(key)")
      assertDupIndex { sql("create oindex p_index on p_table1(key)") }
    } finally {
      sql("drop oindex p_index on p_table1")
    }
  }
}

Example 20

Source File: TracedFlowSpec.scala From money with Apache License 2.0

5 votes

package com.comcast.money.akka.acceptance.stream

import akka.stream.Attributes
import akka.stream.scaladsl.{ Keep, Sink, Source }
import akka.stream.stage.{ InHandler, OutHandler }
import com.comcast.money.akka.Blocking.RichFuture
import com.comcast.money.akka.SpanHandlerMatchers.{ haveSomeSpanNames, maybeCollectingSpanHandler }
import com.comcast.money.akka.stream.{ TracedFlow, TracedFlowLogic }
import com.comcast.money.akka.{ AkkaMoneyScope, MoneyExtension, SpanContextWithStack }
import org.scalatest.Ignore

class TracedFlowSpec extends AkkaMoneyScope {

  "MoneyExtension should pass a span through an Akka Stream" in {
    implicit val moneyExtension: MoneyExtension = MoneyExtension(actorSystem)
    implicit val spanContextWithStack: SpanContextWithStack = new SpanContextWithStack

    testStream().get()

    maybeCollectingSpanHandler should haveSomeSpanNames(testSpanNames)
  }

  "MoneyExtension should pass a span through an asynchronous Akka Stream" in {
    implicit val moneyExtension: MoneyExtension = MoneyExtension(actorSystem)
    implicit val spanContextWithStack: SpanContextWithStack = new SpanContextWithStack

    multithreadedTestStream().get()

    maybeCollectingSpanHandler should haveSomeSpanNames(testSpanNames)
  }

  val testSpanNames = Seq("flow-3", "flow-2", "flow-1")

  def testStream()(implicit spanContextWithStack: SpanContextWithStack, moneyExtension: MoneyExtension) =
    Source[(String, SpanContextWithStack)](List(("", spanContextWithStack)))
      .via(new TestFlowShape("flow-1"))
      .via(new TestFlowShape("flow-2"))
      .via(new TestFlowShape("flow-3", isFinalFlow = true))
      .runWith(Sink.seq)

  def multithreadedTestStream()(implicit spanContextWithStack: SpanContextWithStack, moneyExtension: MoneyExtension) =
    Source[(String, SpanContextWithStack)](List(("", spanContextWithStack)))
      .via(new TestFlowShape("flow-1").async)
      .via(new TestFlowShape("flow-2").async)
      .via(new TestFlowShape("flow-3", isFinalFlow = true).async)
      .runWith(Sink.seq)

  class TestFlowShape(id: String, isFinalFlow: Boolean = false)(implicit moneyExtension: MoneyExtension) extends TracedFlow[String, String] {

    override val inletName: String = "testin"
    override val outletName: String = "testout"

    override def createLogic(inheritedAttributes: Attributes) =
      new TracedFlowLogic {
        setHandler(in, new InHandler {
          override def onPush(): Unit = {
            val logic = (msg: String) => s"$msg$id"
            if (isFinalFlow) stopTracePush(key = id, stageLogic = logic)
            else tracedPush(id, logic)
          }
        })

        setHandler(out, new OutHandler {
          override def onPull(): Unit =
            if (isClosed(in)) completeStage()
            else pull(in)
        })
      }
  }

}

Example 21

Source File: MockingDataSources.scala From Hands-On-Big-Data-Analytics-with-PySpark with MIT License

5 votes

package com.tomekl007.chapter_6

import com.tomekl007.UserTransaction
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.scalatest.{FunSuite, Ignore}

class MockingDataSources extends FunSuite {
  val spark = SparkSession.builder().master("local[2]").getOrCreate()


  ignore("loading data on prod from hive") {
    UserDataLogic.loadAndGetAmount(spark, HiveDataLoader.loadUserTransactions)
  }

  test("mock loading data from hive"){
    //given
    import spark.sqlContext.implicits._
    val df = spark.sparkContext
      .makeRDD(List(UserTransaction("a", 100), UserTransaction("b", 200)))
      .toDF()

    //when
    val res = UserDataLogic.loadAndGetAmount(spark, _ => df)

    //then
    res.show()
  }

}

object UserDataLogic {
  def loadAndGetAmount(sparkSession: SparkSession, provider: SparkSession => DataFrame): DataFrame = {
    val df = provider(sparkSession)
    df.select(df("amount"))
  }
}

object HiveDataLoader {
  def loadUserTransactions(sparkSession: SparkSession): DataFrame = {
    sparkSession.sql("select * from transactions")
  }
}

Example 22

Source File: LDAParitySpec.scala From mleap with Apache License 2.0

5 votes

package org.apache.spark.ml.parity.clustering

import org.apache.spark.ml.{Pipeline, Transformer}
import org.apache.spark.ml.clustering.LDA
import org.apache.spark.ml.feature.{CountVectorizer, StopWordsRemover, Tokenizer}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.parity.SparkParityBase
import org.apache.spark.sql.DataFrame
import org.scalatest.Ignore


@Ignore
class LDAParitySpec extends SparkParityBase {
  override val dataset: DataFrame = textDataset.select("text")

  val tokenizer = new Tokenizer().setInputCol("text").setOutputCol("words")

  val remover = new StopWordsRemover()
    .setInputCol(tokenizer.getOutputCol)
    .setOutputCol("words_filtered")

  val cv = new CountVectorizer().setInputCol("words_filtered").setOutputCol("features").setVocabSize(50000)

  val lda = new LDA().setK(5).setMaxIter(2)

  override val sparkTransformer: Transformer = new Pipeline().setStages(Array(tokenizer, remover, cv, lda)).fit(dataset)

  override def equalityTest(sparkDataset: DataFrame,
                            mleapDataset: DataFrame): Unit = {
    val sparkPredictionCol = sparkDataset.schema.fieldIndex("topicDistribution")
    val mleapPredictionCol = mleapDataset.schema.fieldIndex("topicDistribution")

    sparkDataset.collect().zip(mleapDataset.collect()).foreach {
      case (sv, mv) =>
        val sparkPrediction = sv.getAs[Vector](sparkPredictionCol)
        val mleapPrediction = mv.getAs[Vector](mleapPredictionCol)

        sparkPrediction.toArray.zip(mleapPrediction.toArray).foreach {
          case (s, m) => assert(Math.abs(m - s) < 0.001)
        }
    }
  }
}

Example 23

Source File: RestClientServerTest.scala From udash-core with Apache License 2.0

5 votes

package io.udash
package web.guide.demos.rest

import io.udash.web.SeleniumTest
import org.openqa.selenium.By.ById
import org.scalatest.{BeforeAndAfterEach, Ignore}

//todo migrate content from udash selenium or remove
@Ignore
class RestClientServerTest extends SeleniumTest with BeforeAndAfterEach {
  val url = "/rest"

  "RestClientServer view" should {
    "receive response in demo" in {
      val callDemo = findElementById("echo-rest-demo")
      val inputDemo = callDemo.findElement(new ById("echo-rest-demo-input"))
      val responseDemo = callDemo.findElement(new ById("echo-rest-demo-response"))
      val queryButton = callDemo.findElement(new ById("echo-rest-demo-query-btn"))
      val headerButton = callDemo.findElement(new ById("echo-rest-demo-header-btn"))
      val urlButton = callDemo.findElement(new ById("echo-rest-demo-url-btn"))
      val bodyButton = callDemo.findElement(new ById("echo-rest-demo-body-btn"))

      eventually {
        responseDemo.getText should be("Response:")
      }

      val request = inputDemo.getAttribute("value")

      queryButton.click()
      eventually {
        responseDemo.getText should be(s"Response:\nQuery:$request")
      }

      headerButton.click()
      eventually {
        responseDemo.getText should be(s"Response:\nHeader:$request")
      }

      urlButton.click()
      eventually {
        responseDemo.getText should be(s"Response:\nURL:$request")
      }

      bodyButton.click()
      eventually {
        responseDemo.getText should be(s"Response:\nBody:$request")
      }
    }
  }
}

Example 24

Source File: RestIntroTest.scala From udash-core with Apache License 2.0

5 votes

package io.udash
package web.guide.demos.rest

import io.udash.web.SeleniumTest
import org.openqa.selenium.By.{ById, ByTagName}
import org.scalatest.Ignore

//todo migrate content from udash selenium or remove
@Ignore
class RestIntroTest extends SeleniumTest {
  val url = "/rest"

  "RestIntro view" should {
    "receive response in demo" in {
      val callDemo = findElementById("simple-rest-demo")
      val stringButton = callDemo.findElement(new ById("simple-rest-demo-string-btn"))
      val intButton = callDemo.findElement(new ById("simple-rest-demo-int-btn"))
      val classButton = callDemo.findElement(new ById("simple-rest-demo-class-btn"))

      eventually {
        val responses = callDemo.findElements(new ByTagName("div"))
        responses.size should be(4)
        responses.get(1).getText should be("String: -")
        responses.get(2).getText should be("Int: 0")
        responses.get(3).getText should be("Class: None")
      }

      stringButton.click()
      intButton.click()
      classButton.click()

      eventually {
        val responses = callDemo.findElements(new ByTagName("div"))
        responses.size should be(4)
        responses.get(1).getText should be("String: OK")
        responses.get(2).getText should be("Int: 123")
        responses.get(3).getText should be("Class: Some(RestExampleClass(42,Udash,RestTuple(321.123,REST Support)))")
      }
    }
  }
}

Example 25

Source File: CsvSourceTypeConversionTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.csv

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import io.eels.schema._
import org.scalatest.{Ignore, Matchers, WordSpec}

@Ignore
class CsvSourceTypeConversionTest extends WordSpec with Matchers {
  "CsvSource" should {
    "read schema" in {
      val exampleCsvString =
        """A,B,C,D
          |1,2.2,3,foo
          |4,5.5,6,bar
        """.stripMargin

      val stream = new ByteArrayInputStream(exampleCsvString.getBytes(StandardCharsets.UTF_8))
      val schema = new StructType(Vector(
        Field("A", IntType.Signed),
        Field("B", DoubleType),
        Field("C", IntType.Signed),
        Field("D", StringType)
      ))
      val source = new CsvSource(() => stream)
        .withSchema(schema)
      
      source.schema.fields.foreach(println)
      val ds = source.toDataStream()
      val firstRow = ds.iterator.toIterable.head
      val firstRowA = firstRow.get("A")
      println(firstRowA) // prints 1 as expected
      println(firstRowA.getClass.getTypeName) // prints java.lang.String
      assert(firstRowA == 1) // this assertion will fail because firstRowA is not an Int
    }
  }
}