org.apache.commons.io.FileUtils Scala Examples

The following examples show how to use org.apache.commons.io.FileUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: Releaser.scala    From releaser   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.releaser

import java.io.File
import java.nio.file.{Files, Path}

import org.apache.commons.io.FileUtils
import uk.gov.hmrc.releaser.bintray.{BintrayHttp, BintrayRepoConnector, DefaultBintrayRepoConnector}
import uk.gov.hmrc.releaser.github.{GithubConnector, Repo}
import uk.gov.hmrc.{CredentialsFinder, FileDownloader, Logger}

import scala.util.{Failure, Success, Try}

object ReleaserMain {
  def main(args: Array[String]): Unit = {
    val result = Releaser(args)
    System.exit(result)
  }
}

object Releaser extends Logger {

  import ArgParser._

  def apply(args: Array[String]): Int = {
    parser.parse(args, Config()) match {
      case Some(config) =>
        val githubName = config.githubNameOverride.getOrElse(config.artefactName)
        run(config.artefactName, ReleaseCandidateVersion(config.rcVersion), config.releaseType, githubName, config.releaseNotes, config.dryRun)
      case None => -1
    }
  }

  def run(artefactName: String, rcVersion: ReleaseCandidateVersion, releaseType: ReleaseType.Value, gitHubName: String, releaseNotes: Option[String], dryRun: Boolean = false): Int = {
    val githubCredsFile = System.getProperty("user.home") + "/.github/.credentials"
    val bintrayCredsFile = System.getProperty("user.home") + "/.bintray/.credentials"

    val githubCredsOpt = CredentialsFinder.findGithubCredsInFile(new File(githubCredsFile).toPath)
    val bintrayCredsOpt = CredentialsFinder.findBintrayCredsInFile(new File(bintrayCredsFile).toPath)

    doReleaseWithCleanup { directories =>
      if (githubCredsOpt.isEmpty) {
        log.info(s"Didn't find github credentials in $githubCredsFile")
        -1
      } else if (bintrayCredsOpt.isEmpty) {
        log.info(s"Didn't find Bintray credentials in $bintrayCredsFile")
        -1
      } else {

        val releaserVersion = getClass.getPackage.getImplementationVersion
        val metaDataProvider = new ArtefactMetaDataProvider()
        val gitHubDetails = if (dryRun) GithubConnector.dryRun(githubCredsOpt.get, releaserVersion) else GithubConnector(githubCredsOpt.get, releaserVersion)
        val bintrayDetails = if (dryRun) BintrayRepoConnector.dryRun(bintrayCredsOpt.get, directories.workDir) else BintrayRepoConnector(bintrayCredsOpt.get, directories.workDir)
        val bintrayRepoConnector = new DefaultBintrayRepoConnector(directories.workDir, new BintrayHttp(bintrayCredsOpt.get), new FileDownloader)

        val coordinator = new Coordinator(directories.stageDir, metaDataProvider, gitHubDetails, bintrayRepoConnector)
        val result = coordinator.start(artefactName, Repo(gitHubName), rcVersion, releaseType, releaseNotes)

        result match {
          case Success(targetVersion) =>
            log.info(s"Releaser successfully released $artefactName $targetVersion")
            0
          case Failure(e) =>
            e.printStackTrace()
            log.info(s"Releaser failed to release $artefactName $rcVersion with error '${e.getMessage}'")
            1
        }
      }
    }
  }

  def doReleaseWithCleanup[T](f: ReleaseDirectories => T): T = {
    val directories = ReleaseDirectories()
    try {
      f(directories)
    } finally {
      log.info("cleaning releaser work directory")
      directories.delete().recover{case  t => log.warn(s"failed to delete releaser work directory ${t.getMessage}")}
    }

  }
}

case class ReleaseDirectories(tmpDirectory: Path = Files.createTempDirectory("releaser")) {

  lazy val workDir = Files.createDirectories(tmpDirectory.resolve("work"))
  lazy val stageDir = Files.createDirectories(tmpDirectory.resolve("stage"))

  def delete() = Try {
    FileUtils.forceDelete(tmpDirectory.toFile)
  }
} 
Example 2
Source File: RemoteConfigWriter.scala    From mvn_scalafmt   with Apache License 2.0 5 votes vote down vote up
package org.antipathy.mvn_scalafmt.io

import org.antipathy.mvn_scalafmt.model.RemoteConfig
import java.io.File
import java.nio.charset.StandardCharsets

import org.apache.commons.io.FileUtils
import org.apache.maven.plugin.logging.Log
import java.nio.file.{Files, Path}


  override def write(input: RemoteConfig): Path = {

    log.info(s"Writing remote config to ${input.location.toAbsolutePath}")

    if (Files.exists(input.location)) {
      Files.delete(input.location)
    }

    val newConfig = new File(input.location.toAbsolutePath.toString)
    FileUtils.writeStringToFile(
      newConfig,
      input.contents,
      StandardCharsets.UTF_8
    )
    newConfig.toPath
  }
} 
Example 3
Source File: RemoteConfigWriterSpec.scala    From mvn_scalafmt   with Apache License 2.0 5 votes vote down vote up
package org.antipathy.mvn_scalafmt.io

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}

import org.antipathy.mvn_scalafmt.model.RemoteConfig
import org.apache.commons.io.FileUtils
import org.apache.maven.plugin.logging.SystemStreamLog
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.GivenWhenThen
import org.scalatest.matchers.should.Matchers

class RemoteConfigWriterSpec extends AnyFlatSpec with GivenWhenThen with Matchers {

  behavior of "RemoteConfigWriter"

  it should "Write a config to a local path" in {

    val localPath = s"${System.getProperty("java.io.tmpdir")}${File.separator}.scalafmt.conf"
    val contents  = """version = "1.5.1"
                     |maxColumn = 120
                     |align = false
                     |rewrite.rules = [SortImports]
                     |danglingParentheses = true
                     |importSelectors = singleLine
                     |binPack.parentConstructors = true
                     |includeCurlyBraceInSelectChains = false""".stripMargin
    val writer    = new RemoteConfigWriter(new SystemStreamLog)
    val input     = RemoteConfig(contents, Paths.get(localPath))

    writer.write(input)

    new String(Files.readAllBytes(new File(localPath).toPath))
    Files.delete(input.location)
  }

  it should "Overwrite a config in a local path" in {

    val localPath = s"${System.getProperty("java.io.tmpdir")}${File.separator}.scalafmt2.conf"

    val contents    = """version = "1.5.1"
                     |maxColumn = 120
                     |align = false
                     |rewrite.rules = [SortImports]
                     |danglingParentheses = true
                     |importSelectors = singleLine
                     |binPack.parentConstructors = true
                     |includeCurlyBraceInSelectChains = false""".stripMargin
    val oldContents = "SomeOldConfig"

    val writer = new RemoteConfigWriter(new SystemStreamLog)
    val input  = RemoteConfig(contents, Paths.get(localPath))

    FileUtils.writeStringToFile(new File(localPath), oldContents, StandardCharsets.UTF_8)
    new String(Files.readAllBytes(new File(localPath).toPath)) should be(oldContents)

    writer.write(input)

    new String(Files.readAllBytes(new File(localPath).toPath)) should be(contents)
    Files.delete(input.location)
  }

} 
Example 4
Source File: TaglessFinal.scala    From Mastering-Functional-Programming   with MIT License 5 votes vote down vote up
package jvm

import scala.concurrent.{ Future, Await }
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration

import cats._, cats.implicits._

trait Capabilities[F[_]] {
  def resource(name: String): F[String]
  def notify(target: String, text: String): F[Unit]
}

object TaglessFinalExample extends App {
  implicit val capabilities: Capabilities[Future] = new Capabilities[Future] {
    import java.io.File
    import org.apache.commons.io.FileUtils

    def resource(name: String): Future[String] =
      Future { FileUtils.readFileToString(new File(name), "utf8") }

    def notify(target: String, text: String): Future[Unit] =
      Future { println(s"Notifying $target: $text") }
  }

  implicit val anotherEnvironmentCapabilities: Capabilities[Future] = new Capabilities[Future] {
    def resource(name: String): Future[String] = ???
    def notify(target: String, text: String): Future[Unit] = ???
  }

  implicit val logMonad: Monad[Future] = new Monad[Future] {
    def flatMap[A, B](fa: Future[A])(f: (A) ⇒ Future[B]): Future[B] =
      fa.flatMap { x =>
        println(s"Trace of the Future's result: $x")
        f(x) }
    
    def pure[A](x: A): Future[A] = Future(x)

    def tailRecM[A, B](a: A)(f: (A) ⇒ Future[Either[A, B]]): Future[B] = ???
  }

  def income[F[_]](implicit M: Monad[F], C: Capabilities[F]): F[Unit] =
    for {
      contents <- C.resource("sales.csv")
      total = contents
        .split("\n").toList.tail  // Collection of lines, drop the CSV header
        .map { _.split(",").toList match  // List[Double] - prices of each of the entries
          { case name :: price :: Nil => price.toDouble }
        }
        .sum
      _ <- C.notify("[email protected]", s"Total income made today: $total")
    } yield ()

  Await.result(income[Future](logMonad, capabilities), Duration.Inf)  // Block so that the application does not exit prematurely
}

object FacadeExample {
  trait Capabilities {
    def resource(name: String): String
    def notify(target: String, text: String): Unit
  }

  def income(c: Capabilities): Unit = {
    val contents = c.resource("sales.csv")
    val total = contents
      .split("\n").toList.tail  // Collection of lines, drop the CSV header
      .map { _.split(",").toList match  // List[Double] - prices of each of the entries
        { case name :: price :: Nil => price.toDouble }
      }
      .sum
    c.notify("[email protected]", s"Total income made today: $total")
  }
} 
Example 5
Source File: TilingServiceSpec.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package transform.tiling

import java.io.File
import org.apache.commons.io.FileUtils
import org.specs2.mutable._
import org.specs2.runner._
import org.junit.runner._
import play.api.test._
import play.api.test.Helpers._

@RunWith(classOf[JUnitRunner])
class TilingServiceSpec extends Specification {

  val TEST_IMAGE = new File("test/resources/transform/tiling/Ptolemy_map_15th_century.jpg")

  val TMP_DIR = {
    val dir = new File("test/resources/transform/tiling/tmp")
    if (dir.exists)
      FileUtils.deleteDirectory(dir)
    dir
  }

  "The Tiling function" should {

    "create proper Zoomify tiles from the test image" in {
      TilingService.createZoomify(TEST_IMAGE, TMP_DIR)

      TMP_DIR.exists must equalTo(true)
      TMP_DIR.list.size must equalTo(2)
      new File(TMP_DIR, "ImageProperties.xml").exists must equalTo(true)

      val tileGroup0 = new File(TMP_DIR, "TileGroup0")
      tileGroup0.exists must equalTo(true)

      tileGroup0.list.size must equalTo(65)
      tileGroup0.list.filter(_.endsWith(".jpg")).size must equalTo(65)

      FileUtils.deleteDirectory(TMP_DIR)

      success
    }

  }

} 
Example 6
Source File: TestNewApiWithCaseClass.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.newapi

import java.io.File

import io.gzet.newapi.CreateAvroWithCase.{V21EnhancedDate, GkgRecordCase}
import io.gzet.test.SparkFunSuite
import com.databricks.spark.avro._
import org.apache.commons.io.FileUtils

class TestNewApiWithCaseClass extends SparkFunSuite {

  val inputFilePath = getClass.getResource("/20160101020000.gkg.csv")
  val avroStructPath = "target/20160101020000.gkg.case.avro"

  localTest("Create and write Avro using spark-avro lib and case") { spark =>

    val gdeltRDD = spark.sparkContext.textFile(inputFilePath.toString)

    val gdeltRowRDD = gdeltRDD.map(_.split("\t", -1))

    val gkgRecordRDD = gdeltRowRDD.map(attributes =>
      GkgRecordCase(CreateAvroWithCase.createGkgRecordId(attributes(0)),
      attributes(1).toLong,
      attributes(2),
      attributes(3),
      attributes(4),
      CreateAvroWithCase.createV1Counts(attributes(5)),
      CreateAvroWithCase.createV21Counts(attributes(6)),
      CreateAvroWithCase.createV1Themes(attributes(7)),
      CreateAvroWithCase.createV2EnhancedThemes(attributes(8)),
      CreateAvroWithCase.createV1Locations(attributes(9)),
      CreateAvroWithCase.createV2Locations(attributes(10)),
      CreateAvroWithCase.createV1Persons(attributes(11)),
      CreateAvroWithCase.createV2Persons(attributes(12)),
      CreateAvroWithCase.createV1Orgs(attributes(13)),
      CreateAvroWithCase.createV2Orgs(attributes(14)),
      CreateAvroWithCase.createV1Stone(attributes(15)),
      CreateAvroWithCase.createEnhancedDate((attributes(16))),
      CreateAvroWithCase.createV2GCAM(attributes(17)),
      attributes(18),
      CreateAvroWithCase.createV21RelImgAndVid(attributes(19)),
      CreateAvroWithCase.createV21RelImgAndVid(attributes(20)),
      CreateAvroWithCase.createV21RelImgAndVid(attributes(21)),
      CreateAvroWithCase.createV21Quotations(attributes(22)),
      CreateAvroWithCase.createV21AllNames(attributes(23)),
      CreateAvroWithCase.createV21Amounts(attributes(24)),
      CreateAvroWithCase.createV21TransInfo(attributes(25)),
      attributes(26))
    )

    FileUtils.deleteDirectory(new File(avroStructPath))

    val gdeltDF = spark.createDataFrame(gkgRecordRDD)
    gdeltDF.write.avro(avroStructPath)

    assertResult(4) (new File(avroStructPath).listFiles.length)
  }

  localTest("Read Avro into Dataframe using spark-avro") { spark =>
    val gdeltAvroDF = spark.read.format("com.databricks.spark.avro").load(avroStructPath)
    assertResult(10)(gdeltAvroDF.count)

    gdeltAvroDF.show
  }
} 
Example 7
Source File: TestNewApiWithStructs.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.newapi

import java.io.File

import io.gzet.test.SparkFunSuite
import com.databricks.spark.avro._
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.Row

class TestNewApiWithStructs extends SparkFunSuite {

  val inputFilePath = getClass.getResource("/20160101020000.gkg.csv")
  val avroStructPath = "target/20160101020000.gkg.struct.avro"

  localTest("Create and write Avro using spark-avro lib and Structs") { spark =>
    val gdeltRDD = spark.sparkContext.textFile(inputFilePath.toString)

    val gdeltRowRDD = gdeltRDD.map(_.split("\t", -1))
      .map(attributes => Row(
        CreateAvroWithStructs.createGkgRecordID(attributes(0)),
        attributes(1).toLong,
        attributes(2),
        attributes(3),
        attributes(4),
        CreateAvroWithStructs.createV1Counts(attributes(5)),
        CreateAvroWithStructs.createV21Counts(attributes(6)),
        CreateAvroWithStructs.createV1Themes(attributes(7)),
        CreateAvroWithStructs.createV2EnhancedThemes(attributes(8)),
        CreateAvroWithStructs.createV1Locations(attributes(9)),
        CreateAvroWithStructs.createV2Locations(attributes(10)),
        CreateAvroWithStructs.createV1Persons(attributes(11)),
        CreateAvroWithStructs.createV2Persons(attributes(12)),
        CreateAvroWithStructs.createV1Orgs(attributes(13)),
        CreateAvroWithStructs.createV2Orgs(attributes(14)),
        CreateAvroWithStructs.createV1Stone(attributes(15)),
        CreateAvroWithStructs.createV21Dates(attributes(16)),
        CreateAvroWithStructs.createV2GCAM(attributes(17)),
        attributes(18),
        CreateAvroWithStructs.createV21RelImgAndVid(attributes(19)),
        CreateAvroWithStructs.createV21RelImgAndVid(attributes(20)),
        CreateAvroWithStructs.createV21RelImgAndVid(attributes(21)),
        CreateAvroWithStructs.createV21Quotations(attributes(22)),
        CreateAvroWithStructs.createV21AllNames(attributes(23)),
        CreateAvroWithStructs.createV21Amounts(attributes(24)),
        CreateAvroWithStructs.createV21TransInfo(attributes(25)),
        attributes(26)
      ))

    FileUtils.deleteDirectory(new File(avroStructPath))

    val gdeltDF = spark.createDataFrame(gdeltRowRDD, CreateAvroWithStructs.GkgSchema)
    gdeltDF.write.avro(avroStructPath)

    assertResult(4) (new File(avroStructPath).listFiles.length)

  }

  localTest("Read Avro into Dataframe using spark-avro") { spark =>
    val gdeltAvroDF = spark.read.format("com.databricks.spark.avro").load(avroStructPath)
    assertResult(10) (gdeltAvroDF.count)
    gdeltAvroDF.show
  }
} 
Example 8
Source File: CryptoTest.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.hadoop.io.compress.CryptoCodec
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{Matchers, FunSuite}

class CryptoTest extends FunSuite with Matchers {

  val cryptoDir = System.getProperty("java.io.tmpdir") + "cryptTestDir"

  test("Crypto encrypt then decrypt file") {
    val conf = new SparkConf()
      .setAppName("Test Crypto")
      .setMaster("local")
      .set("spark.default.parallelism", "1")
      .set("spark.hadoop.io.compression.codecs", "org.apache.hadoop.io.compress.CryptoCodec")
    val sc = new SparkContext(conf)

    val testFile = getClass.getResource("/gdeltTestFile.csv")
    val rdd = sc.textFile(testFile.getPath)

    rdd.saveAsTextFile(cryptoDir, classOf[CryptoCodec])
    val read = sc.textFile(cryptoDir)

    val allLines = read.collect
    allLines.size should be(20)
    allLines(0).startsWith("331150686") should be (true)
    allLines(allLines.length - 1).endsWith("polytrack/") should be (true)

    FileUtils.deleteDirectory(new File(cryptoDir))
    sc.stop
  }
} 
Example 9
Source File: StorageSpec.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.piglet.backends.spark

import java.io.File

import dbis.piglet.backends.{Record, SchemaClass}
import org.apache.commons.io.FileUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest._



case class DataRecord(col1: Int, col2: String) extends java.io.Serializable with SchemaClass {
  override def mkString(delim: String) = s"$col1$delim$col2"
}

case class DoubleRecord(col1: Double, col2: Double) extends java.io.Serializable with SchemaClass {
  override def mkString(delim: String) = s"$col1$delim$col2"
}

class StorageSpec extends FlatSpec with Matchers with BeforeAndAfter {
  var sc: SparkContext = _
  var conf: SparkConf = _

  before {
    // to avoid Akka rebinding to the same port, since it doesn't unbind
    // immediately after shutdown
    System.clearProperty("spark.driver.port")
    System.clearProperty("spark.hostPort")
    conf = new SparkConf().setMaster("local").setAppName(getClass.getSimpleName)
    sc = new SparkContext(conf)
  }

  after {
    // cleanup SparkContext data
    sc.stop()
    sc = null
    conf = null
    System.clearProperty("spark.driver.port")
    System.clearProperty("spark.hostPort")
  }

  "PigStorage" should "load objects using an extractor" in {
    val res = PigStorage[Person]().load(sc, "sparklib/src/test/resources/person.csv",
      (data: Array[String]) => Person(data(0), data(1).toInt), ",")
    res.collect() should be (Array(Person("Anna", 21), Person("John", 53), Person("Mike", 32)))
  }

  it should "save and load records" in {
    val res = PigStorage[Person]().load(sc, "sparklib/src/test/resources/person.csv",
      (data: Array[String]) => Person(data(0), data(1).toInt), ",")
    PigStorage[Person]().write("person.data", res, "|")
    val otherRes = PigStorage[Person]().load(sc, "person.data",
      (data: Array[String]) => Person(data(0), data(1).toInt), "[|]")
    res.collect() should be (otherRes.collect())
    FileUtils.deleteDirectory(new File("person.data"))
   }

  
  }
} 
Example 10
Source File: FlinkStreamingCEPTest.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.cep.test.flink

import java.io.File

import dbis.piglet.backends.{ Record, SchemaClass }
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.scalatest._
import org.apache.commons.io.FileUtils
import org.apache.flink.api.scala._
import dbis.piglet.cep.nfa._
import dbis.piglet.cep.ops.SelectionStrategy._
import dbis.piglet.cep.ops.OutputStrategy._
import dbis.piglet.cep.flink.CustomDataStreamMatcher._
import scala.collection.mutable.ArrayBuffer
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow
import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows

case class StreamingDoubleRecord(col1: Int, col2: Int) extends java.io.Serializable with SchemaClass {
  override def mkString(delim: String) = s"$col1$delim$col2"
}

object OurStreamingNFA {
    def filter1(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 1
    def filter2(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 2
    def filter3(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 3
    def createNFA = {
      val testNFA: NFAController[StreamingDoubleRecord] = new NFAController()
      val firstState = testNFA.createAndGetStartState("First")
      val secondState = testNFA.createAndGetNormalState("Second")
      val thirdState = testNFA.createAndGetNormalState("Third")
      val finalState = testNFA.createAndGetFinalState("Final")

      val firstEdge = testNFA.createAndGetForwardEdge(filter1)
      val secondEdge = testNFA.createAndGetForwardEdge(filter2)
      val thirdEdge = testNFA.createAndGetForwardEdge(filter3)

      testNFA.createForwardTransition(firstState, firstEdge, secondState)
      testNFA.createForwardTransition(secondState, secondEdge, thirdState)
      testNFA.createForwardTransition(thirdState, thirdEdge, finalState)
      testNFA
    }
  }

class FlinkStreamingCEPTest extends FlatSpec with Matchers with BeforeAndAfterEach {
  var resultArray = new ArrayBuffer[StreamingDoubleRecord]
  override def beforeEach() {
     resultArray.clear()
  }

  val sample = Seq(
      StreamingDoubleRecord(1,1), 
      StreamingDoubleRecord(2,2), 
      StreamingDoubleRecord(1,3), 
      StreamingDoubleRecord(2,4), 
      StreamingDoubleRecord(3,5), 
      StreamingDoubleRecord(1,6),
      StreamingDoubleRecord(2,7),
      StreamingDoubleRecord(3,8))
      
  "Flink Streaming CEP" should "detect the pattern SEQ(A, B, C) with first match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, FirstMatch)
  }

  it should "detect the pattern SEQ(A, B, C) with any match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, AllMatches)
  }

  it should "detect the pattern SEQ(A, B, C) with next match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, NextMatches)
  }

  it should "detect the pattern SEQ(A, B, C) with contiguity match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, ContiguityMatches)
  }
} 
Example 11
Source File: ArchiveUtils.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.util


import org.slf4j.LoggerFactory
import org.apache.commons.compress.archivers.tar.TarArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.FileUtils
import org.apache.commons.io.IOUtils
import java.io._
import java.util.zip.GZIPInputStream
import java.util.zip.ZipInputStream

      tarIn.close()
    }
    else if (file.endsWith(".gz")) {
      val is2 = new GZIPInputStream(fin)
      val extracted = new File(target.getParent, target.getName.replace(".gz", ""))
      if (extracted.exists) extracted.delete
      extracted.createNewFile
      val fos = FileUtils.openOutputStream(extracted)
      IOUtils.copyLarge(is2, fos)
      is2.close()
      fos.flush()
      fos.close()
    }
    target.delete
  }
} 
Example 12
Source File: FlowerDataSetIterator.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.examples.transferlearning.vgg16.dataHelpers

import java.io.{File, IOException}
import java.net.URL

import org.datavec.api.io.filters.BalancedPathFilter
import org.datavec.api.io.labels.ParentPathLabelGenerator
import org.datavec.api.split.{FileSplit, InputSplit}
import org.datavec.image.loader.BaseImageLoader
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator
import java.util
import java.util.Random

import org.apache.commons.io.FileUtils
import org.datavec.api.util.ArchiveUtils
import org.datavec.image.recordreader.ImageRecordReader
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
import org.deeplearning4j.nn.modelimport.keras.trainedmodels.TrainedModels


object FlowerDataSetIterator {
  private val log = org.slf4j.LoggerFactory.getLogger(FlowerDataSetIterator.getClass)

  private val DATA_DIR = new File(System.getProperty("user.home")) + "/dl4jDataDir"
  private val DATA_URL = "http://download.tensorflow.org/example_images/flower_photos.tgz"
  private val FLOWER_DIR = DATA_DIR + "/flower_photos"

  private val allowedExtensions = BaseImageLoader.ALLOWED_FORMATS
  private val rng = new Random(13)

  private val height = 224
  private val width = 224
  private val channels = 3
  private val numClasses = 5

  private val labelMaker = new ParentPathLabelGenerator
  private var trainData: InputSplit = _
  private var testData: InputSplit = _
  private var batchSize = 0

  @throws(classOf[IOException])
  def trainIterator: DataSetIterator = makeIterator(trainData)

  @throws(classOf[IOException])
  def testIterator: DataSetIterator = makeIterator(testData)

  @throws(classOf[IOException])
  def setup(batchSizeArg: Int, trainPerc: Int): Unit = {
    try
      downloadAndUntar()
    catch {
      case e: IOException =>
        e.printStackTrace()
        log.error("IOException : ", e)
    }

    batchSize = batchSizeArg
    val parentDir = new File(FLOWER_DIR)
    val filesInDir = new FileSplit(parentDir, allowedExtensions, rng)
    val pathFilter = new BalancedPathFilter(rng, allowedExtensions, labelMaker)
    if (trainPerc >= 100)
      throw new IllegalArgumentException("Percentage of data set aside for training has to be less than 100%." +
        " Test percentage = 100 - training percentage, has to be greater than 0")
    val filesInDirSplit = filesInDir.sample(pathFilter, trainPerc, 100 - trainPerc)
    trainData = filesInDirSplit(0)
    testData = filesInDirSplit(1)
  }

  @throws(classOf[IOException])
  private def makeIterator(split: InputSplit) = {
    val recordReader = new ImageRecordReader(height, width, channels, labelMaker)
    recordReader.initialize(split)
    val iter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numClasses)
    iter.setPreProcessor(TrainedModels.VGG16.getPreProcessor)
    iter
  }

  @throws(classOf[IOException])
  def downloadAndUntar(): Unit = {
    val rootFile = new File(DATA_DIR)
    if (!rootFile.exists) rootFile.mkdir
    val tarFile = new File(DATA_DIR, "flower_photos.tgz")
    if (!tarFile.isFile) {
      log.info("Downloading the flower dataset from " + DATA_URL + "...")
      FileUtils.copyURLToFile(new URL(DATA_URL), tarFile)
    }
    ArchiveUtils.unzipFileTo(tarFile.getAbsolutePath, rootFile.getAbsolutePath)
  }
} 
Example 13
Source File: PerTestSparkSession.scala    From Spark-RSVD   with Apache License 2.0 5 votes vote down vote up
package com.criteo.rsvd

import java.io.File
import java.nio.file.{Files, Path}
import java.util.concurrent.locks.ReentrantLock

import org.apache.commons.io.FileUtils
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SQLContext, SparkSession}
import org.scalatest.{BeforeAndAfterEach, Suite}

import scala.reflect.ClassTag
import scala.util.control.NonFatal

object LocalSparkSession {
  private[this] val lock = new ReentrantLock()

  def acquire(): Unit = lock.lock()

  def release(): Unit = lock.unlock()

  def builder: SparkSession.Builder = {
    SparkSession
      .builder()
      .master("local[*]")
      .appName("test")
      .config("spark.ui.enabled", false)
  }
}


  def sparkConf: Map[String, Any] = Map()

  def toRDD[T: ClassTag](input: Seq[T]): RDD[T] = sc.parallelize(input)

  def toArray[T](input: RDD[T]): Array[T] = input.collect()

  protected def closeSession() = {
    currentSession.foreach(_.stop())
    currentSession = None
    try {
      checkpointDir.foreach(path =>
        FileUtils.deleteDirectory(new File(path.toString)))
    } catch {
      case NonFatal(_) =>
    }
    checkpointDir = None
    LocalSparkSession.release()
  }

  private def getOrCreateSession = synchronized {
    if (currentSession.isEmpty) {
      val builder = LocalSparkSession.builder
      for ((key, value) <- sparkConf) {
        builder.config(key, value.toString)
      }
      currentSession = Some(builder.getOrCreate())
      checkpointDir =
        Some(Files.createTempDirectory("spark-unit-test-checkpoint-"))
      currentSession.get.sparkContext
        .setCheckpointDir(checkpointDir.get.toString)
        currentSession.get.sparkContext.setLogLevel("WARN")
    }
    currentSession.get
  }

  override def beforeEach(): Unit = {
    LocalSparkSession.acquire()
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    try {
      super.afterEach()
    } finally {
      closeSession()
    }
  }
} 
Example 14
Source File: DockerCopyBuildAction.scala    From berilia   with Apache License 2.0 5 votes vote down vote up
package com.criteo.dev.cluster.docker

import java.io.File

import com.criteo.dev.cluster.{GeneralConstants, GeneralUtilities}
import org.apache.commons.io.FileUtils



class DockerCopyBuildAction (dockerFile: String,
                             dockerImage: String,
                             resourcePath: String)
  extends DockerBuildAction (dockerFile, dockerImage) {

  val tempDir = "tmpResources"

  override def run() : Unit = {

    val tmpResourcePath = s"${GeneralUtilities.getHomeDir}/${DockerConstants.dockerBaseDir}/$tempDir"
    val tmpResource = new File(tmpResourcePath)
    GeneralUtilities.prepareDir(tmpResourcePath)

    val resource = new File(s"${GeneralUtilities.getHomeDir}/$resourcePath")
    require (resource.exists(), s"Internal error, resource to copy does not exist: $resourcePath")

    if (resource.isFile()) {
       FileUtils.copyFileToDirectory(resource, tmpResource)
    } else if (resource.isDirectory()) {
       FileUtils.copyDirectory(resource, tmpResource)
    }

    super.addArg(DockerConstants.resource, s"$tempDir")
    super.run()

    FileUtils.deleteDirectory(tmpResource)
  }
}

object DockerCopyBuildAction {
  def apply(dockerFile: String,
            dockerImage: String,
            resourcePath: String) = {
    val obj = new DockerCopyBuildAction(dockerFile, dockerImage, resourcePath)
    obj.run
  }
} 
Example 15
Source File: SourceFileSequenceBuilder.scala    From mvn_scalafmt   with Apache License 2.0 5 votes vote down vote up
package org.antipathy.mvn_scalafmt.builder

import java.io.File
import java.nio.file.{Files, Paths}

import org.apache.commons.io.FileUtils
import org.apache.maven.plugin.logging.Log

import scala.jdk.CollectionConverters._


  override def build(paths: Seq[File]): Seq[File] =
    if (paths == null) {
      log.warn("Could not locate any scala sources to format")
      Seq.empty[File]
    } else {
      val files = paths.map(_.getCanonicalPath).flatMap { p =>
        if (Files.exists(Paths.get(p))) {
          Some(new File(p))
        } else {
          log.warn(s"Could not locate Scala source at $p")
          None
        }
      }
      files.flatMap(file => FileUtils.listFiles(file, Array("scala", "sc", "sbt"), true).asScala)
    }
} 
Example 16
Source File: TgzTransformerSpec.scala    From releaser   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.releaser

import java.io._
import java.nio.file.{Files, Path}

import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream}
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.FileUtils
import org.scalatest._

import scala.collection.mutable.ListBuffer
import scala.util.{Failure, Success}

class TgzTransformerSpec extends WordSpec with Matchers with BeforeAndAfterEach with OptionValues with TryValues{

  val tgzPath = new File(this.getClass.getResource("/help-frontend/uk/gov/hmrc/help-frontend_2.11/1.26.0-3-gd7ed03c/help-frontend_2.11-1.26.0-3-gd7ed03c.tgz").toURI).toPath

  var transformer:TgzTransformer = _
  val candidate_1_26_0_3_gd7ed03c = ReleaseCandidateVersion("1.26.0-3-gd7ed03c")
  val release_1_4_0 = ReleaseVersion("1.4.0")
  var tmpDir:Path = _

  override def beforeEach(){
    tmpDir = Files.createTempDirectory("tmp")
    transformer = new TgzTransformer()
    FileUtils.copyFileToDirectory(tgzPath.toFile, tmpDir.toFile)
  }

  override def afterEach(){
    FileUtils.deleteDirectory(tmpDir.toFile)
  }

  "the transformer" should {

    "decompress the tgz, rename the main folder and compress it back" in {

      val inFile = new File(tmpDir.toFile, tgzPath.getFileName.toString).toPath
      val targetFilePath = tmpDir.resolve("help-frontend-1.4.0.tgz")

      val originalTarEntries = listTgzEntries(inFile)
      assertTarEntry(originalTarEntries, "./help-frontend-1.26.0-3-gd7ed03c/")
      assertTarEntry(originalTarEntries, "./help-frontend-1.4.0/", exists = false)
      assertTarEntry(originalTarEntries, "./start-docker.sh", mode = Some(493))

      val outFileTry = transformer(inFile, "help-frontend", candidate_1_26_0_3_gd7ed03c, release_1_4_0, targetFilePath)
      outFileTry match {
        case Success(outFile) =>
          val tarEntries = listTgzEntries(targetFilePath)
          assertTarEntry(tarEntries, "./help-frontend-1.26.0-3-gd7ed03c/", exists = false)
          assertTarEntry(tarEntries, "./help-frontend-1.4.0/")
          assertTarEntry(tarEntries, "./start-docker.sh", mode = Some(493))
        case Failure(e) => fail("Caught exception: " + e.getMessage, e)
      }


    }
  }

  private def listTgzEntries(localTgzFile: Path) : List[TarArchiveEntry] =  {
    val bytes = new Array[Byte](2048)
    val fin = new BufferedInputStream(new FileInputStream(localTgzFile.toFile))
    val gzIn = new GzipCompressorInputStream(fin)
    val tarIn = new TarArchiveInputStream(gzIn)

    val entries = ListBuffer[TarArchiveEntry]()

    Iterator continually tarIn.getNextTarEntry takeWhile (null !=) foreach { tarEntry =>
      entries += tarEntry
    }

    tarIn.close()

    entries.toList

  }

  private def assertTarEntry(tarEntries: List[TarArchiveEntry], entryName: String, exists: Boolean = true, mode: Option[Int] = None) = {
    val entryOption = tarEntries.find(_.getName == entryName)
    entryOption match {
      case Some(entry) =>
        exists shouldBe true
        mode.foreach { m => m shouldBe entry.getMode}
      case None => exists shouldBe false
    }

  }

} 
Example 17
Source File: HttpSlippyTileReader.scala    From geotrellis-osm-elevation   with Apache License 2.0 5 votes vote down vote up
package geotrellis.osme.core

import geotrellis.vector._
import geotrellis.raster._
import geotrellis.raster.io.geotiff._
import geotrellis.spark._
import geotrellis.spark.io.s3._

import geotrellis.spark.io.slippy._
import geotrellis.util.Filesystem

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter._
import org.apache.commons.io.IOUtils._
import org.apache.spark._
import org.apache.spark.rdd._
import java.net._
import java.io.File

class HttpSlippyTileReader[T](pathTemplate: String)(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] {
    def getURL(template: String, z: Int, x: Int, y: Int) = 
        template.replace("{z}", z.toString).replace("{x}", x.toString).replace("{y}", y.toString)
    def getByteArray(url: String) = {
      val inStream = new URL(url).openStream()
      try {
        toByteArray(inStream)
      } finally {
        inStream.close()
      }
    }

    def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = ???
    def read(zoom: Int, key: SpatialKey): T = fromBytes(key, getByteArray(getURL(pathTemplate, zoom, key.col, key.row)))
    override def read(zoom: Int, x: Int, y: Int): T =
        read(zoom, SpatialKey(x, y))
} 
Example 18
Source File: QueryCsvTest.scala    From apache-spark-test   with Apache License 2.0 5 votes vote down vote up
package com.github.dnvriend.spark.sstreaming

import com.github.dnvriend.TestSpec
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.streaming.{ OutputMode, ProcessingTime }
import org.apache.spark.sql.types._
import org.scalatest.Ignore

import scala.concurrent.duration._
import scala.language.implicitConversions

@Ignore
class QueryCsvTest extends TestSpec {
  def copyFiles(nrTimes: Int = 10): Unit = {
    FileUtils.deleteDirectory("/tmp/csv")
    FileUtils.forceMkdir("/tmp/csv")
    (1 to nrTimes).foreach { x =>
      FileUtils.copyFile(TestSpec.PeopleCsv, s"/tmp/csv/people-$x")
    }
  }

  val schema: StructType = StructType(Array(
    StructField("id", LongType, nullable = false),
    StructField("name", StringType, nullable = true),
    StructField("age", IntegerType, nullable = true)
  ))

  it should "query csv file" in withSparkSession { spark =>
    copyFiles()

    val csv = spark.readStream
      .schema(schema)
      .format("csv")
      .option("maxFilesPerTrigger", 1)
      .option("header", "false") // Use first line of all files as header
      .option("inferSchema", "false") // Automatically infer data types
      .option("delimiter", ";")
      .load("/tmp/csv")

    csv.printSchema()

    println("Is the query streaming: " + csv.isStreaming)
    println("Are there any streaming queries? " + spark.streams.active.isEmpty)

    val query = csv
      .writeStream
      .format("console")
      .trigger(ProcessingTime(5.seconds))
      .queryName("consoleStream")
      .outputMode(OutputMode.Append())
      .start()

    // waiting for data
    sleep(3.seconds)
    spark.streams
      .active
      .foreach(println)

    spark.streams
      .active
      .foreach(_.explain(extended = true))

    query.awaitTermination(20.seconds)
  }
} 
Example 19
Source File: SharedSparkSessionSuite.scala    From ecosystem   with Apache License 2.0 5 votes vote down vote up
package org.tensorflow.spark.datasources.tfrecords

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.spark.SharedSparkSession
import org.junit.{After, Before}
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}


trait BaseSuite extends WordSpecLike with Matchers with BeforeAndAfterAll

class SharedSparkSessionSuite extends SharedSparkSession with BaseSuite {
  val TF_SANDBOX_DIR = "tf-sandbox"
  val file = new File(TF_SANDBOX_DIR)

  @Before
  override def beforeAll() = {
    super.setUp()
    FileUtils.deleteQuietly(file)
    file.mkdirs()
  }

  @After
  override def afterAll() = {
    FileUtils.deleteQuietly(file)
    super.tearDown()
  }
} 
Example 20
Source File: LocalWriteSuite.scala    From ecosystem   with Apache License 2.0 5 votes vote down vote up
package org.tensorflow.spark.datasources.tfrecords

import java.nio.file.Files
import java.nio.file.Paths

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types._

import org.apache.commons.io.FileUtils

class LocalWriteSuite extends SharedSparkSessionSuite {

  val testRows: Array[Row] = Array(
    new GenericRow(Array[Any](11, 1, 23L, 10.0F, 14.0, List(1.0, 3.0), "r1")),
    new GenericRow(Array[Any](21, 2, 24L, 12.0F, 15.0, List(2.0, 3.0), "r2")),
    new GenericRow(Array[Any](31, 3, 25L, 14.0F, 16.0, List(3.0, 3.0), "r3")))
  val schema = StructType(List(StructField("id", IntegerType),
    StructField("IntegerTypeLabel", IntegerType),
    StructField("LongTypeLabel", LongType),
    StructField("FloatTypeLabel", FloatType),
    StructField("DoubleTypeLabel", DoubleType),
    StructField("VectorLabel", ArrayType(DoubleType, true)),
    StructField("name", StringType)))


  "Propagate" should {
    "write data locally" in {
      // Create a dataframe with 2 partitions
      val rdd = spark.sparkContext.parallelize(testRows, numSlices = 2)
      val df = spark.createDataFrame(rdd, schema)

      // Write the partitions onto the local hard drive. Since it is going to be the
      // local file system, the partitions will be written in the same directory of the
      // same machine.
      // In a distributed setting though, two different machines would each hold a single
      // partition.
      val localPath = Files.createTempDirectory("spark-connector-propagate").toAbsolutePath.toString
      val savePath = localPath + "/testResult"
      df.write.format("tfrecords")
        .option("recordType", "Example")
        .option("writeLocality", "local")
        .save(savePath)

      // Read again this directory, this time using the Hadoop file readers, it should
      // return the same data.
      // This only works in this test and does not hold in general, because the partitions
      // will be written on the workers. Everything runs locally for tests.
      val df2 = spark.read.format("tfrecords").option("recordType", "Example")
        .load(savePath).sort("id").select("id", "IntegerTypeLabel", "LongTypeLabel",
        "FloatTypeLabel", "DoubleTypeLabel", "VectorLabel", "name") // Correct column order.

      assert(df2.collect().toSeq === testRows.toSeq)
    }
  }
} 
Example 21
Source File: JsonIOTest.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.extra.json

import java.nio.file.Files

import io.circe.Printer
import com.spotify.scio._
import com.spotify.scio.io.TapSpec
import com.spotify.scio.testing._
import com.spotify.scio.util.ScioUtil
import org.apache.beam.sdk.Pipeline.PipelineExecutionException
import org.apache.commons.io.FileUtils

import scala.jdk.CollectionConverters._
import scala.io.Source

object JsonIOTest {
  case class Record(i: Int, s: String, o: Option[Int])
}

class JsonIOTest extends ScioIOSpec with TapSpec {
  import JsonIOTest._

  private val xs = (1 to 100).map(x => Record(x, x.toString, if (x % 2 == 0) Some(x) else None))

  "JsonIO" should "work" in {
    testTap(xs)(_.saveAsJsonFile(_))(".json")
    testJobTest(xs)(JsonIO(_))(_.jsonFile(_))(_.saveAsJsonFile(_))
  }

  it should "support custom printer" in {
    val dir = tmpDir
    val t = runWithFileFuture {
      _.parallelize(xs)
        .saveAsJsonFile(dir.getPath, printer = Printer.noSpaces.copy(dropNullValues = true))
    }
    verifyTap(t, xs.toSet)
    val result = Files
      .list(dir.toPath)
      .iterator()
      .asScala
      .flatMap(p => Source.fromFile(p.toFile).getLines())
      .toSeq
    val expected = (1 to 100).map { x =>
      s"""{"i":$x,"s":"$x"${if (x % 2 == 0) s""","o":$x""" else ""}}"""
    }
    result should contain theSameElementsAs expected
    FileUtils.deleteDirectory(dir)
  }

  it should "handle invalid JSON" in {
    val badData = Seq(
      """{"i":1, "s":hello}""",
      """{"i":1}""",
      """{"s":"hello"}""",
      """{"i":1, "s":1}""",
      """{"i":"hello", "s":1}"""
    )
    val dir = tmpDir
    runWithFileFuture {
      _.parallelize(badData).saveAsTextFile(dir.getPath)
    }

    val sc = ScioContext()
    sc.jsonFile[Record](ScioUtil.addPartSuffix(dir.getPath))

    a[PipelineExecutionException] should be thrownBy { sc.run() }

    FileUtils.deleteDirectory(dir)
  }
} 
Example 22
Source File: ConverterProviderTest.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.avro.types

import java.nio.file.Files

import com.spotify.scio._
import com.spotify.scio.avro._
import org.apache.commons.io.FileUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ConverterProviderTest extends AnyFlatSpec with Matchers {
  import ConverterProviderTest._

  "ConverterProvider" should "#1831: handle Avro map" in {
    val dir = Files.createTempDirectory("avro-")
    val data = Seq(Record(Map("a" -> 1), Some(Map("b" -> 2)), List(Map("c" -> 3))))

    val sc1 = ScioContext()
    sc1.parallelize(data).saveAsTypedAvroFile(dir.toString)
    sc1.run()

    val sc2 = ScioContext()
    val t = sc2.typedAvroFile[Record](s"$dir/*.avro").materialize
    sc2.run()

    t.underlying.value.toSeq should contain theSameElementsAs data

    FileUtils.deleteDirectory(dir.toFile)
  }
}

object ConverterProviderTest {
  @AvroType.toSchema
  case class Record(a: Map[String, Int], b: Option[Map[String, Int]], c: List[Map[String, Int]])
} 
Example 23
Source File: TFTapTest.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.tensorflow

import java.util.UUID

import com.spotify.scio.io.TapSpec
import org.apache.commons.io.FileUtils

class TFTapTest extends TapSpec {
  "SCollection" should "support saveAsTFRecordFile" in {
    val data = Seq.fill(100)(UUID.randomUUID().toString)
    import org.apache.beam.sdk.io.{Compression => CType}
    for (compressionType <- Seq(CType.UNCOMPRESSED, CType.DEFLATE, CType.GZIP)) {
      val dir = tmpDir
      val t = runWithFileFuture {
        _.parallelize(data)
          .map(_.getBytes)
          .saveAsTfRecordFile(dir.getPath, compression = compressionType)
      }
      verifyTap(t.map(new String(_)), data.toSet)
      FileUtils.deleteDirectory(dir)
    }
  }
} 
Example 24
Source File: UDFBuilder.scala    From sope   with Apache License 2.0 5 votes vote down vote up
package com.sope.etl.register

import java.io.File
import java.net.URLClassLoader

import com.sope.etl.getObjectInstance
import com.sope.etl.transform.exception.YamlDataTransformException
import com.sope.etl.utils.JarUtils
import com.sope.utils.Logging
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.expressions.UserDefinedFunction

import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter.IMain

object  UDFBuilder extends Logging {

  val DefaultClassLocation = "/tmp/sope/dynamic/"
  val DefaultJarLocation = "/tmp/sope/sope-dynamic-udf.jar"


  
  def buildDynamicUDFs(udfCodeMap: Map[String, String]): Map[String, UserDefinedFunction] = {
    val file = new java.io.File(UDFBuilder.DefaultClassLocation)
    FileUtils.deleteDirectory(file)
    file.mkdirs()
    val udfMap = evalUDF(udfCodeMap)
    JarUtils.buildJar(DefaultClassLocation, DefaultJarLocation)
    udfMap
  }

} 
Example 25
Source File: BruteForceSequenceMatcher.scala    From sonar-scala   with GNU Lesser General Public License v3.0 5 votes vote down vote up
package com.buransky.plugins.scoverage.pathcleaner

import java.io.File
import org.apache.commons.io.FileUtils
import BruteForceSequenceMatcher._
import com.buransky.plugins.scoverage.util.PathUtil
import scala.collection.JavaConversions._
import org.sonar.api.utils.log.Loggers

object BruteForceSequenceMatcher {

  val extensions = Array[String]("java", "scala")

  type PathSeq = Seq[String]
}


class BruteForceSequenceMatcher(baseDir: File, sourcePath: String) extends PathSanitizer {

  private val sourceDir = initSourceDir()
  require(sourceDir.isAbsolute)
  require(sourceDir.isDirectory)

  private val log = Loggers.get(classOf[BruteForceSequenceMatcher])
  private val sourcePathLength = PathUtil.splitPath(sourceDir.getAbsolutePath).size
  private val filesMap = initFilesMap()


  def getSourceRelativePath(reportPath: PathSeq): Option[PathSeq] = {
    // match with file system map of files
    val relPathOption = for {
      absPathCandidates <- filesMap.get(reportPath.last)
      path <- absPathCandidates.find(absPath => absPath.endsWith(reportPath))
    } yield path.drop(sourcePathLength)

    relPathOption
  }

  // mock able helpers that allow us to remove the dependency to the real file system during tests

  private[pathcleaner] def initSourceDir(): File = {
    sourcePath.split(",").headOption.map { first =>
      val firstFile = new File(first)
      if (firstFile.isAbsolute) {
        firstFile
      } else {
        val sourceDir = new File(baseDir, first)
        sourceDir
      }
    }.orNull
  }

  private[pathcleaner] def initFilesMap(): Map[String, Seq[PathSeq]] = {
    val srcFiles = FileUtils.iterateFiles(sourceDir, extensions, true)
    val paths = srcFiles.map(file => PathUtil.splitPath(file.getAbsolutePath)).toSeq

    // group them by filename, in case multiple files have the same name
    paths.groupBy(path => path.last)
  }

} 
Example 26
Source File: GeneratorTest.scala    From courier   with Apache License 2.0 5 votes vote down vote up
package org.coursera.courier.generator

import java.io.File
import java.io.IOException

import com.linkedin.data.DataList
import com.linkedin.data.DataMap
import com.linkedin.data.codec.JacksonDataCodec
import com.linkedin.data.template.DataTemplate
import com.linkedin.data.template.JacksonDataTemplateCodec
import com.linkedin.data.template.PrettyPrinterJacksonDataTemplateCodec
import org.apache.commons.io.FileUtils
import org.scalatest.junit.AssertionsForJUnit
import org.scalatest.junit.JUnitSuite

abstract class GeneratorTest extends JUnitSuite with AssertionsForJUnit {

  def printJson(dataTemplate: DataTemplate[DataMap]): Unit = printJson(dataTemplate.data)

  def printJson(dataMap: DataMap): Unit = println(mapToJson(dataMap))

  def assertJson(left: DataTemplate[DataMap], right: String): Unit = {
    val leftMap = readJsonToMap(mapToJson(left.data()))
    val rightMap = readJsonToMap(right)
    assert(leftMap === rightMap)
  }

  def roundTrip(complex: DataMap): DataMap = {
    readJsonToMap(mapToJson(complex))
  }

  def roundTrip(complex: DataList): DataList = {
    readJsonToList(listToJson(complex))
  }

  private val jsonPath = new File(
    System.getProperty("referencesuite.srcdir") +
      File.separator + "main" + File.separator + "json")

  protected def load(filename: String): String = {
    FileUtils.readFileToString(new File(jsonPath, filename))
  }

  private val prettyPrinter = new PrettyPrinterJacksonDataTemplateCodec
  private val codec = new JacksonDataTemplateCodec
  private val dataCodec = new JacksonDataCodec

  private def mapToJson(dataTemplate: DataTemplate[DataMap]): String = mapToJson(dataTemplate.data)

  private def listToJson(dataTemplate: DataTemplate[DataList]): String = {
    listToJson(dataTemplate.data)
  }

  private def mapToJson(dataMap: DataMap): String = prettyPrinter.mapToString(dataMap)

  private def listToJson(dataList: DataList): String = prettyPrinter.listToString(dataList)

  private def readJsonToMap(string: String): DataMap = dataCodec.stringToMap(string)

  private def readJsonToList(string: String): DataList = dataCodec.stringToList(string)
} 
Example 27
Source File: BillerCache.scala    From apple-of-my-iap   with MIT License 5 votes vote down vote up
package com.meetup.iap

import com.meetup.iap.receipt.Subscription
import org.slf4j.LoggerFactory

import java.io.File
import scala.io.Source

import org.json4s.DefaultFormats
import org.json4s.native.Serialization.{read, writePretty}
import org.apache.commons.io.FileUtils


object BillerCache {
  val log = LoggerFactory.getLogger(BillerCache.getClass)

  implicit val formats = DefaultFormats

  private val ProjectName = "iap-service"
  private val inProject = new File(".").getCanonicalPath.endsWith(ProjectName)

  private val Folder = {
    val base = if(inProject) "" else "iap-service/"
    new File(s"${base}tmp/")
  }
  if(!Folder.exists) {
    Folder.mkdirs
  }

  private val TempFile = new File(Folder, "subscriptions.json")
  if(!TempFile.exists) {
    TempFile.createNewFile
  }

  private val PlansFile = new File(Folder, "plans.json")
  if (!PlansFile.exists) {
    PlansFile.createNewFile
  }

  def readFromCache(): Map[String, Subscription] = {
    log.info("Reading from file: " + TempFile.getAbsolutePath)
    val raw = Source.fromFile(TempFile).mkString.trim

    if(raw.nonEmpty) {
        Map(read[Map[String, Subscription]](raw).toSeq: _*)
    } else Map.empty
  }

  def writeToCache(subs: Map[String, Subscription]) {
      val json = writePretty(subs)
      FileUtils.writeStringToFile(TempFile, json, "UTF-8")
  }

  def readPlansFromFile(): List[Plan] = {
    log.info(s"Reading from plans file: ${PlansFile.getAbsolutePath}")
    val raw = Source.fromFile(PlansFile).mkString.trim

    if(raw.nonEmpty) {
      log.info("Found some plans")
      List(read[List[Plan]](raw).toSeq: _*)
    } else List.empty
  }
} 
Example 28
Source File: TransformerSerialization.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperables.spark.wrappers.transformers

import java.nio.file.{Files, Path}

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfter, Suite}

import io.deepsense.deeplang.doperables.Transformer
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import io.deepsense.deeplang.{DeeplangIntegTestSupport, ExecutionContext}

trait TransformerSerialization extends Suite with BeforeAndAfter {

  var tempDir: Path = _

  before {
    tempDir = Files.createTempDirectory("writeReadTransformer")
  }

  after {
    FileUtils.deleteDirectory(tempDir.toFile)
  }
}

object TransformerSerialization {

  implicit class TransformerSerializationOps(private val transformer: Transformer) {

    def applyTransformationAndSerialization(
        path: Path,
        df: DataFrame)(implicit executionContext: ExecutionContext): DataFrame = {
      val result = transformer._transform(executionContext, df)
      val deserialized = loadSerializedTransformer(path)
      val resultFromSerializedTransformer = deserialized._transform(executionContext, df)
      DeeplangIntegTestSupport.assertDataFramesEqual(result, resultFromSerializedTransformer)
      result
    }

    def loadSerializedTransformer(
        path: Path)(
        implicit executionContext: ExecutionContext): Transformer = {
      val outputPath: Path = path.resolve(this.getClass.getName)
      transformer.save(executionContext, outputPath.toString)
      Transformer.load(executionContext, outputPath.toString)
    }
  }
} 
Example 29
Source File: ParquetIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.Files

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import com.spotify.ratatool.scalacheck._
import org.apache.commons.io.FileUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ParquetIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "ParquetIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream(in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(genericData, genericSchema, file)
    val result = ParquetIO.readFromFile(file).toList
    result should equal (genericData)
    FileUtils.deleteDirectory(dir.toFile)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(specificData, specificSchema, file)
    val result = ParquetIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
    FileUtils.deleteDirectory(dir.toFile)
  }

} 
Example 30
Source File: ModifyFilesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.io.Source


class ModifyFilesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "ModifyFilesRule" should "modify the file correctly" in {
    val ruleConfig = ModifyFilesRuleConfig(
      "**/fileA.txt",
      None,
      List(
        ContentMapping("hello\\s(.+)\\n", "hallo $1\n"),
        ContentMapping("(.+)\\sBob", "$1 Alice")
      )
    )
    val projectCtx = new GenericProjectCtx(destProjectRoot)
    val provider = new AllFilesModelProvider
    val model = provider create projectCtx
    val rule = new ModifyFilesRule(ruleConfig)
    val result = rule transform model
    val file = result.files.find(_.getName == "fileA.txt")
    file.nonEmpty should be (true)
    Source.fromFile(file.get).getLines.toList should be (List("hallo world", "hi Alice"))
  }

} 
Example 31
Source File: MoveFilesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import java.io.File

import org.apache.commons.io.FileUtils
import org.json4s.jackson.JsonMethods._
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}


class MoveFilesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MoveFilesRule" should "move file to the dest directory" in {
    val ruleConfigJson = asJsonNode(parse(
      """
        |{
        | "moves":[
        |   {
        |     "pathPattern":"**.txt", "otherdirectory/dest"),
        Move("*.txt", "otherdirectory")
      )
    )
    val projectCtx = new GenericProjectCtx(destProjectRoot)
    val provider = new AllFilesModelProvider
    val model = provider create projectCtx
    val rule = new MoveFilesRule(ruleConfig)
    val result = rule transform model
    result.files forall (_.exists) should be (true)
  }

} 
Example 32
Source File: ModifyXMLFilesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import java.io.File

import org.apache.commons.io.FileUtils
import com.ebay.rtran.xml._
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.io.Source
import scala.language.postfixOps


class ModifyXMLFilesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "ModifyXMLFilesRuleTest" should "able to delete nodes" in {
    val provider = new XMLFilesModelProvider
    val ruleConfig = ModifyXMLFilesRuleConfig(
      Some("***.xml"),
      List(
        ModifyXMLOperation(
          "//person[@name=\'Bob\']/job",
          OperationType.Replace,
          Some("<job>Software Engineer</job>")
        )
      )
    )
    val provider = new XMLFilesModelProvider
    val rule = new ModifyXMLFilesRule(ruleConfig)
    val transformedModel = rule.transform(provider.create(new GenericProjectCtx(destProjectRoot)))
    provider save transformedModel

    val transformedContent = Source.fromFile(new File(destProjectRoot, "somedirectory/someXML.xml")).getLines.mkString("\n")
    transformedContent should include ("Bob")
    transformedContent should include ("Software Engineer")
    transformedContent should not include "Salesman"
  }

} 
Example 33
Source File: RuleEngineTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.core

import java.io.File

import org.apache.commons.io.FileUtils
import org.json4s.jackson.JsonMethods._
import com.ebay.rtran.core.mock.{MyModifyFileRule, MyProject, MyRenameFileRule, MyRenameFileRuleConfig}
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.io.Source
import scala.collection.JavaConversions._


class RuleEngineTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectDir = new File(getClass.getClassLoader.getResource("myproject").getFile)
  val backupDir = new File(projectDir.getParentFile, projectDir.getName + "-bak")

  override def beforeEach = {
    FileUtils.copyDirectory(projectDir, backupDir)
  }
  override def afterEach = {
    FileUtils.deleteQuietly(backupDir)
  }

  "RuleEngine" should "execute rules from UpgradeConfiguration" in {
    val engine = new RuleEngine
    val projectRoot = backupDir
    val configuration = JsonUpgradeConfiguration( List(
      JsonRuleConfiguration("ModifyFileRule", None),
      JsonRuleConfiguration("RenameFileRule", Some(parse("""{"newName":"anotherfile"}""")))
    ))
    engine.execute(new MyProject(projectRoot), configuration)
    new File(projectRoot, "somefile").exists should be (false)
    new File(projectRoot, "anotherfile").exists should be (true)
    Source.fromFile(new File(projectRoot, "anotherfile")).getLines.toList should be (List("hi world", "hi Bob"))
  }

  "RuleEngine" should "execute rules from code" in {
    val engine = new RuleEngine
    val projectRoot = backupDir
    engine.execute(
      new MyProject(projectRoot),
      List(
        new MyModifyFileRule(),
        new MyRenameFileRule(MyRenameFileRuleConfig("anotherfile"))
      )
    )
    new File(projectRoot, "somefile").exists should be (false)
    new File(projectRoot, "anotherfile").exists should be (true)
    Source.fromFile(new File(projectRoot, "anotherfile")).getLines.toList should be (List("hi world", "hi Bob"))
  }

} 
Example 34
Source File: CliExec.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.cli

// scalastyle:off
// TODO(vlad): make sure that a simple intellij run fills in the resources
// @see https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala#L54
// scalastyle:on
import java.io.File

import com.salesforce.op.cli.gen.Ops
import org.apache.commons.io.FileUtils

class CliExec {
  protected val DEBUG = false

  private[cli] def delete(dir: File): Unit = {
    FileUtils.deleteDirectory(dir)
    if (dir.exists()) {
      throw new IllegalStateException(s"Directory '${dir.getAbsolutePath}' still exists")
    }
  }

  def main(args: Array[String]): Unit = try {
    val ops = for {
      arguments <- CommandParser.parse(args, CliParameters())
      if arguments.command == "gen"
      settings <- arguments.values
    } yield Ops(settings)

    ops getOrElse {
      CommandParser.showUsage()
      quit("wrong arguments", 1)
    }

    val outcome = ops.map (_.run())

    outcome getOrElse quit("Generation failed; see error messages", 1)

  } catch {
    case x: Exception =>
      if (DEBUG) x.printStackTrace()
      val msg = Option(x.getMessage).getOrElse(x.getStackTrace.mkString("", "\n", "\n"))
      quit(msg)
  }

  def quit(errorMsg: String, code: Int = -1): Nothing = {
    System.err.println(errorMsg)
    sys.exit(code)
  }
}

object CLI {
  def main(args: Array[String]): Unit = (new CliExec).main(args)
} 
Example 35
Source File: LogFile.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.operation

import java.io.{BufferedReader, File, FileInputStream, FileNotFoundException, FileOutputStream, InputStreamReader, IOException, PrintStream}
import java.util.ArrayList

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.hadoop.io.IOUtils
import org.apache.kyuubi.Logging
import org.apache.spark.sql.Row

import yaooqinn.kyuubi.KyuubiSQLException

class LogFile private (
    file: File,
    private var reader: Option[BufferedReader],
    writer: PrintStream,
    @volatile private var isRemoved: Boolean = false) extends Logging {

  def this(file: File) = {
    this(file,
      LogFile.createReader(file, isRemoved = false),
      new PrintStream(new FileOutputStream(file)))
  }

  private def resetReader(): Unit = {
    reader.foreach(IOUtils.closeStream)
    reader = None
  }

  private def readResults(nLines: Long): Seq[Row] = {
    reader = reader.orElse(LogFile.createReader(file, isRemoved))

    val logs = new ArrayList[Row]()
    reader.foreach { r =>
      var i = 1
      try {
        var line: String = r.readLine()
        while ((i < nLines || nLines <= 0) && line != null) {
          logs.add(Row(line))
          line = r.readLine()
          i += 1
        }
      } catch {
        case e: FileNotFoundException =>
          val operationHandle = file.getName
          val path = file.getAbsolutePath
          val msg = if (isRemoved) {
            s"Operation[$operationHandle] has been closed and the log file $path has been removed"
          } else {
            s"Operation[$operationHandle] Log file $path is not found"
          }
          throw new KyuubiSQLException(msg, e)
      }
    }
    logs.asScala
  }

  
  def write(msg: String): Unit = {
    writer.print(msg)
  }


  def close(): Unit = synchronized {
    try {
      reader.foreach(_.close())
      writer.close()
      if (!isRemoved) {
        FileUtils.forceDelete(file)
        isRemoved = true
      }
    } catch {
      case e: IOException =>
        error(s"Failed to remove corresponding log file of operation: ${file.getName}", e)
    }
  }
}

object LogFile {

  def createReader(file: File, isRemoved: Boolean): Option[BufferedReader] = try {
    Option(new BufferedReader(new InputStreamReader(new FileInputStream(file))))
  } catch {
    case e: FileNotFoundException =>
      val operationHandle = file.getName
      val path = file.getAbsolutePath
      val msg = if (isRemoved) {
        s"Operation[$operationHandle] has been closed and the log file $path has been removed"
      } else {
        s"Operation[$operationHandle] Log file $path is not found"
      }
      throw new KyuubiSQLException(msg, e)
  }
} 
Example 36
Source File: DefaultSourceSpec.scala    From jgit-spark-connector   with Apache License 2.0 5 votes vote down vote up
package tech.sourced.engine

import java.nio.file.{Path, Paths}
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.eclipse.jgit.api.Git

class DefaultSourceSpec extends BaseSourceSpec("DefaultSource") {

  var tmpPath: Path = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID.toString)

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    tmpPath.toFile.mkdir()
  }

  "DefaultSource" should "not optimize if the conditions on the " +
    "join are not the expected ones" in {
    val repos = engine.getRepositories
    val references = ss.read.format("tech.sourced.engine").option("table", "references").load()
    val out = repos.join(references,
      (references("repository_id") === repos("id"))
        .and(references("name").startsWith("refs/pull"))
    ).count()

    val df = references.limit(1).getCommits
    df.count() should be(1)
  }

  it should "return the remote branches renamed to refs/heads" in {
    val repoDir = tmpPath.resolve("repo")

    Git.cloneRepository()
      .setURI("https://github.com/src-d/jgit-spark-connector.git")
      .setDirectory(repoDir.toFile)
      .call()

    val engine = Engine(ss, tmpPath.toString, "standard")
    val masters = engine.getRepositories
      .getMaster
      .collect()
      .sortBy(_.getAs[String]("repository_id"))

    masters.length should be(2)
    masters(0).getAs[String]("repository_id") should startWith("file")
    masters(0).getAs[Boolean]("is_remote") should be(false)

    masters(1).getAs[String]("repository_id") should startWith("github")
    masters(1).getAs[Boolean]("is_remote") should be(true)

    engine.getRepositories.getRemoteReferences.getMaster.count() should be(1)
  }

  it should "match HEAD and not just refs/heads/HEAD" in {
    val repoDir = tmpPath.resolve("repo")

    import tech.sourced.engine.util.RepoUtils._

    val repo = createRepo(repoDir)
    commitFile(repo, "foo", "bar", "baz")

    Engine(ss, tmpPath.toString, "standard").getRepositories.getHEAD.count() should be(1)
  }

  it should "traverse all commits if it's not chained" in {
    val row = engine.session.sql("SELECT COUNT(*) FROM commits").first()
    row(0) should be(4444)

    val row2 = engine.session.sql("SELECT COUNT(*) FROM commits WHERE index > 0").first()
    row2(0) should be(4390)
  }

  override protected def afterAll(): Unit = {
    super.afterAll()

    FileUtils.deleteQuietly(tmpPath.toFile)
  }
} 
Example 37
Source File: RepositoryRDDProviderSpec.scala    From jgit-spark-connector   with Apache License 2.0 5 votes vote down vote up
package tech.sourced.engine.provider

import java.nio.file.{Path, Paths}
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import tech.sourced.engine.util.RepoUtils
import tech.sourced.engine.{BaseSivaSpec, BaseSparkSpec}

class RepositoryRDDProviderSpec extends FlatSpec with Matchers with BeforeAndAfterEach
  with BaseSparkSpec with BaseSivaSpec {

  private var provider: RepositoryRDDProvider = _
  private var tmpPath: Path = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    provider = RepositoryRDDProvider(ss.sparkContext)
    tmpPath = Paths.get(
      System.getProperty("java.io.tmpdir"),
      UUID.randomUUID().toString
    )
  }

  override def afterEach(): Unit = {
    super.afterEach()

    FileUtils.deleteQuietly(tmpPath.toFile)
  }

  "RepositoryRDDProvider" should "retrieve bucketized raw repositories" in {
    tmpPath.resolve("a").toFile.mkdir()
    createRepo(tmpPath.resolve("a").resolve("repo"))

    tmpPath.resolve("b").toFile.mkdir()
    createRepo(tmpPath.resolve("b").resolve("repo"))

    createRepo(tmpPath.resolve("repo"))

    val repos = provider.get(tmpPath.toString, "standard").collect()
    repos.length should be(3)
  }

  it should "retrieve non-bucketized raw repositories" in {
    tmpPath.resolve("a").toFile.mkdir()
    createRepo(tmpPath.resolve("repo"))

    tmpPath.resolve("b").toFile.mkdir()
    createRepo(tmpPath.resolve("repo2"))

    val repos = provider.get(tmpPath.toString, "standard").collect()
    repos.length should be(2)
  }

  it should "retrieve bucketized siva repositories" in {
    val repos = provider.get(resourcePath, "siva").collect()
    repos.length should be(3)
  }

  it should "retrieve non-bucketized siva repositories" in {
    val repos = provider.get(Paths.get(resourcePath, "ff").toString, "siva").collect()
    repos.length should be(1)
  }

  private def createRepo(path: Path) = {
    val repo = RepoUtils.createRepo(path)
    RepoUtils.commitFile(repo, "file.txt", "something something", "some commit")
  }

} 
Example 38
Source File: RepoUtils.scala    From jgit-spark-connector   with Apache License 2.0 5 votes vote down vote up
package tech.sourced.engine.util

import java.nio.file.{Path, Paths}

import org.apache.commons.io.FileUtils
import org.eclipse.jgit.api.CreateBranchCommand.SetupUpstreamMode
import org.eclipse.jgit.api.Git
import org.eclipse.jgit.revwalk.RevCommit
import org.eclipse.jgit.transport.URIish

object RepoUtils {

  def createBareRepo(path: Path): Git = {
    Git.init().setBare(true).setDirectory(path.toFile).call()
  }

  def createRepo(path: Path): Git = {
    Git.init().setDirectory(path.toFile).call()
  }

  def addRemote(repo: Git, name: String, url: String): Unit = {
    val cmd = repo.remoteAdd()
    cmd.setName(name)
    cmd.setUri(new URIish(url))
    cmd.call()
  }

  def commitFile(repo: Git, name: String, content: String, msg: String): RevCommit = {
    val file = Paths.get(repo.getRepository.getDirectory.getParent, name)
    FileUtils.write(file.toFile, content)
    repo.add().addFilepattern(name).call()
    repo.commit().setMessage(msg).call()
  }

} 
Example 39
Source File: MetadataIteratorSpec.scala    From jgit-spark-connector   with Apache License 2.0 5 votes vote down vote up
package tech.sourced.engine.iterator

import java.nio.file.Paths
import java.util.{Properties, UUID}

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.types.{Metadata, StringType, StructType}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
import tech.sourced.engine.{BaseSparkSpec, Schema}

class JDBCQueryIteratorSpec
  extends FlatSpec with Matchers with BeforeAndAfterAll with BaseSparkSpec {
  private val tmpPath = Paths.get(
    System.getProperty("java.io.tmpdir"),
    UUID.randomUUID.toString
  )

  private val dbPath = tmpPath.resolve("test.db")

  override def beforeAll(): Unit = {
    super.beforeAll()
    tmpPath.toFile.mkdir()
    val rdd = ss.sparkContext.parallelize(Seq(
      Row("id1"),
      Row("id2"),
      Row("id3")
    ))

    val properties = new Properties()
    properties.put("driver", "org.sqlite.JDBC")
    val df = ss.createDataFrame(rdd, StructType(Seq(Schema.repositories.head)))
    df.write.jdbc(s"jdbc:sqlite:${dbPath.toString}", "repositories", properties)
  }

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.deleteQuietly(tmpPath.toFile)
  }

  "JDBCQueryIterator" should "return all rows for the query" in {
    val iter = new JDBCQueryIterator(
      Seq(attr("id")),
      dbPath.toString,
      "SELECT id FROM repositories ORDER BY id"
    )

    // calling hasNext more than one time does not cause rows to be lost
    iter.hasNext
    iter.hasNext
    val rows = (for (row <- iter) yield row).toArray
    rows.length should be(3)
    rows(0).length should be(1)
    rows(0)(0).toString should be("id1")
    rows(1)(0).toString should be("id2")
    rows(2)(0).toString should be("id3")
  }

  private def attr(name: String): Attribute = AttributeReference(
    name, StringType, nullable = false, Metadata.empty
  )()
} 
Example 40
Source File: BloopSpec.scala    From seed   with Apache License 2.0 5 votes vote down vote up
package seed.generation

import java.nio.file.{Files, Path}

import bloop.config.ConfigEncoderDecoders
import minitest.SimpleTestSuite
import org.apache.commons.io.FileUtils
import seed.generation.util.BuildUtil.tempPath

object BloopSpec extends SimpleTestSuite {
  def parseBloopFile(path: Path): bloop.config.Config.File = {
    val json = FileUtils.readFileToString(path.toFile, "UTF-8")
    io.circe.parser.decode(json)(ConfigEncoderDecoders.allDecoder).right.get
  }

  test("Inherit javaDeps in child modules") {
    val projectPath = tempPath.resolve("inherit-javadeps")
    Files.createDirectory(projectPath)

    val bloopPath = projectPath.resolve(".bloop")
    val build     = util.ProjectGeneration.generateJavaDepBloopProject(projectPath)

    assertEquals(build("example").module.jvm.get.moduleDeps, List("base"))

    val base = parseBloopFile(bloopPath.resolve("base.json"))
    assert(
      base.project.classpath
        .exists(_.toString.contains("/org/postgresql/postgresql/"))
    )

    val example = parseBloopFile(bloopPath.resolve("example.json"))
    assert(
      example.project.classpath
        .exists(_.toString.contains("/org/postgresql/postgresql/"))
    )

    val exampleTest = parseBloopFile(bloopPath.resolve("example-test.json"))
    assert(
      exampleTest.project.classpath
        .exists(_.toString.contains("/org/postgresql/postgresql/"))
    )
  }
} 
Example 41
Source File: PublishSpec.scala    From seed   with Apache License 2.0 5 votes vote down vote up
package seed.cli

import java.io.File
import java.nio.file.{Files, Path}

import minitest.SimpleTestSuite
import org.apache.commons.io.FileUtils
import seed.Log
import seed.generation.util.BuildUtil

import sys.process._

object PublishSpec extends SimpleTestSuite {
  def testVersionDetection(path: File): Unit = {
    Process("git init", path).!!

    FileUtils.write(new File(path, "test.txt"), "test", "UTF-8")
    Process("git add test.txt", path).!!
    Process("git commit . -m import", path).!!
    Process("git tag 0.1.0", path).!! // no 'v' prefix
    assertEquals(
      Publish.getVersion(path.toPath, None, Log.silent),
      Some("0.1.0")
    )

    FileUtils.write(new File(path, "test2.txt"), "test", "UTF-8")
    Process("git add test2.txt", path).!!
    Process("git commit . -m import", path).!!
    Process("git tag v0.1.1", path).!! // 'v' prefix
    assertEquals(
      Publish.getVersion(path.toPath, None, Log.silent),
      Some("0.1.1")
    )
  }

  test("Determine version number (relative path)") {
    val relativePath = new File("temp-git-version")
    if (Files.exists(relativePath.toPath))
      FileUtils.deleteDirectory(relativePath)
    Files.createDirectories(relativePath.toPath)
    testVersionDetection(relativePath)
    FileUtils.deleteDirectory(relativePath)
  }

  test("Determine version number (absolute path)") {
    val relativePath = BuildUtil.tempPath.resolve("git-version")
    if (Files.exists(relativePath))
      FileUtils.deleteDirectory(relativePath.toFile)
    Files.createDirectories(relativePath)
    testVersionDetection(relativePath.toFile)
  }
} 
Example 42
Source File: WatcherSpec.scala    From seed   with Apache License 2.0 5 votes vote down vote up
package seed.cli.util

import java.nio.file.Files

import minitest.SimpleTestSuite
import org.apache.commons.io.FileUtils
import seed.generation.util.BuildUtil
import zio.IO

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global

object WatcherSpec extends SimpleTestSuite {
  testAsync("Detect new file in root path") {
    val rootPath = BuildUtil.tempPath.resolve("watcher")
    Files.createDirectories(rootPath)

    val collected = mutable.ListBuffer[Unit]()
    var stop      = false

    val watcher = Watcher
      .watchPaths(
        List(rootPath),
        () => {
          // Only consider Scala/Java source files
          FileUtils.write(rootPath.resolve("test.html").toFile, "test", "UTF-8")
          stop = true
          FileUtils
            .write(rootPath.resolve("test.scala").toFile, "test", "UTF-8")
        }
      )
      .foreachWhile { v =>
        IO.effectTotal {
          collected += v
          !stop
        }
      }

    RTS.unsafeRunToFuture(watcher).map(_ => assertEquals(collected, List(())))
  }

  testAsync("Detect new file in sub-directory") {
    val rootPath         = BuildUtil.tempPath.resolve("watcher2")
    val subDirectoryPath = rootPath.resolve("sub")
    Files.createDirectories(subDirectoryPath)

    val collected = mutable.ListBuffer[Unit]()
    var stop      = false

    val watcher = Watcher
      .watchPaths(List(rootPath), { () =>
        stop = true
        FileUtils.write(rootPath.resolve("test.scala").toFile, "test", "UTF-8")
      })
      .foreachWhile { v =>
        IO.effectTotal {
          collected += v
          !stop
        }
      }

    RTS.unsafeRunToFuture(watcher).map(_ => assertEquals(collected, List(())))
  }

  testAsync("Watch file path") {
    val rootPath = BuildUtil.tempPath.resolve("watcher3")
    Files.createDirectories(rootPath)
    val filePath = rootPath.resolve("test.scala")
    FileUtils.write(filePath.toFile, "test", "UTF-8")

    val collected = mutable.ListBuffer[Unit]()
    var stop      = false

    val watcher = Watcher
      .watchPaths(List(filePath), { () =>
        stop = true
        FileUtils.write(filePath.toFile, "test2", "UTF-8")
      })
      .foreachWhile { v =>
        IO.effectTotal {
          collected += v
          !stop
        }
      }

    RTS.unsafeRunToFuture(watcher).map(_ => assertEquals(collected, List(())))
  }
} 
Example 43
Source File: XMLFilesModelProviderTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import java.io.File

import org.apache.commons.io.FileUtils
import com.ebay.rtran.xml.XMLFilesModelProvider
import org.scalatest.{FlatSpecLike, Matchers}

import scala.io.Source


class XMLFilesModelProviderTest extends FlatSpecLike with Matchers {

  val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile)

  "XMLFilesModeProvider" should "get all xml files in the project" in {
    val provider = new XMLFilesModelProvider
    val model = provider.create(new GenericProjectCtx(projectRoot))
    model.xmlRoots.size should be (1)
  }

  "XMLFilesModeProvider" should "be able to save the files that are marked modified" in {
    val provider = new XMLFilesModelProvider
    val model = provider.create(new GenericProjectCtx(projectRoot))
    val (file, root) = model.xmlRoots.head
    val newFile = new File(file.getParentFile, file.getName + ".new")
    provider.save(model.copy(modified = Map(newFile -> Some(root))))
    val content = Source.fromFile(newFile).getLines.filterNot(_.matches("\\s+")).map(_.trim).mkString
    content should not be ""
    FileUtils.deleteQuietly(newFile)
  }

} 
Example 44
Source File: LogWriterSpec.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.logging

import java.nio.file.{Files, Paths}

import akka.actor.ActorSystem
import akka.pattern.ask
import akka.testkit.{TestActorRef, TestKit}
import akka.util.Timeout
import com.typesafe.config.ConfigFactory
import io.hydrosphere.mist.core.logging.LogEvent
import io.hydrosphere.mist.master.LogStoragePaths
import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterAll, FunSpecLike, Matchers}

import scala.concurrent.Await
import scala.concurrent.duration._

class LogWriterSpec extends TestKit(ActorSystem("log-writer-test", ConfigFactory.load("master")))
  with FunSpecLike
  with Matchers
  with BeforeAndAfterAll {

  val dirName = "log_writer_test"
  val dir = Paths.get(".", "target", dirName)

  override def beforeAll(): Unit = {
    Files.createDirectories(dir)
  }

  override def afterAll(): Unit = {
    FileUtils.deleteDirectory(dir.toFile)
    TestKit.shutdownActorSystem(system)
  }

  implicit val timeout = Timeout(5 second)

  describe("writer actor") {

    it("should write to file") {
      val path = dir.resolve("test")
      val f = path.toFile
      if (f.exists()) f.delete()
      Files.createFile(path)

      val actor = TestActorRef(WriterActor.props(path))

      val event = LogEvent.mkDebug("id", "message")
      val future = actor ? WriteRequest("id", Seq(event))
      val update = Await.result(future.mapTo[LogUpdate], Duration.Inf)

      update.jobId shouldBe "id"
      update.events shouldBe Seq(event)
      update.bytesOffset shouldBe (event.mkString + "\n").getBytes.length
    }
  }

  describe("writers group") {

    it("should proxy to writer") {
      val mappings = new LogStoragePaths(dir)
      val expectedPath = mappings.pathFor("id")
      if (Files.exists(expectedPath)) Files.delete(expectedPath)

      val actor = TestActorRef(WritersGroupActor.props(mappings))

      val event = LogEvent.mkDebug("id", "message")
      val future = actor ? WriteRequest("id", Seq(event))
      val update = Await.result(future.mapTo[LogUpdate], Duration.Inf)

      val expectedSize = (event.mkString + "\n").getBytes.length

      update.jobId shouldBe "id"
      update.events shouldBe Seq(event)
      update.bytesOffset shouldBe expectedSize

      Files.readAllBytes(expectedPath).length shouldBe expectedSize
    }
  }
} 
Example 45
Source File: FStorageSpec.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.data

import java.nio.file.Paths

import com.typesafe.config.{Config, ConfigValueFactory}
import io.hydrosphere.mist.master.models.NamedConfig
import org.apache.commons.io.FileUtils
import org.scalatest._

class FStorageSpec extends FunSpec with Matchers with BeforeAndAfter {

  case class TestEntry(
    name: String,
    value: Int
  ) extends NamedConfig

  val testEntryConfigRepr = new ConfigRepr[TestEntry] {
    import scala.collection.JavaConverters._

    override def fromConfig(config: Config): TestEntry = {
      TestEntry(config.getString("name"), config.getInt("value"))
    }

    override def toConfig(a: TestEntry): Config = {
      val map = Map("value" -> ConfigValueFactory.fromAnyRef(a.value))
      ConfigValueFactory.fromMap(map.asJava).toConfig
    }
  }

  val path = "./target/file_store_test"

  before {
    val f = Paths.get(path).toFile
    if (f.exists()) FileUtils.deleteDirectory(f)
  }

  it("should store files") {
    val storage = FsStorage.create(path, testEntryConfigRepr)

    storage.write("one", TestEntry("one", 1))
    storage.write("two", TestEntry("two", 2))

    storage.entries should contain allOf(
      TestEntry("one", 1),
      TestEntry("two", 2)
    )

    storage.delete("one")
    storage.entries should contain allElementsOf(Seq(TestEntry("two", 2)))
  }

} 
Example 46
Source File: FunctionConfigStorageSpec.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.data

import java.nio.file.Paths

import io.hydrosphere.mist.master.models.FunctionConfig
import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfter, Matchers, FunSpec}

class FunctionConfigStorageSpec extends FunSpec with Matchers with BeforeAndAfter {

  val path = "./target/data/func_store_test"

  before {
    val f = Paths.get(path).toFile
    if (f.exists()) FileUtils.deleteDirectory(f)
  }

  import scala.concurrent.ExecutionContext.Implicits.global
  import io.hydrosphere.mist.master.TestUtils._

  it("should update") {
    val functions = testStorage()

    functions.all.await.size shouldBe 1

    functions.update(FunctionConfig("second", "path", "className", "foo")).await
    functions.all.await.size shouldBe 2
  }

  it("should get") {
    val functions = testStorage()

    functions.get("first").await.isDefined shouldBe true
    functions.get("second").await.isDefined shouldBe false

    functions.update(FunctionConfig("second", "path", "className", "foo")).await
    functions.get("second").await.isDefined shouldBe true
  }

  it("should override defaults") {
    val functions = testStorage()

    functions.get("first").await.get.className shouldBe "className"

    functions.update(FunctionConfig("first", "path", "anotherClassName", "foo")).await
    functions.get("first").await.get.className shouldBe "anotherClassName"
  }

  def testStorage(
    defaults: Seq[FunctionConfig] = Seq(FunctionConfig("first", "path", "className", "foo"))): FunctionConfigStorage = {
    new FunctionConfigStorage(
      FsStorage.create(path, ConfigRepr.EndpointsRepr),
      defaults
    )
  }
} 
Example 47
Source File: RunnerSelectorSpec.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.worker.runners

import java.io.File
import java.nio.file.Paths

import io.hydrosphere.mist.worker.SparkArtifact
import io.hydrosphere.mist.worker.runners.python.PythonRunner
import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfter, FunSpecLike, Matchers}

class RunnerSelectorSpec extends FunSpecLike
  with Matchers
  with BeforeAndAfter {
  val basePath = "./target/runner"
  val pyFile = SparkArtifact(Paths.get(basePath, "test.py").toFile, "url")
  val jarFile = SparkArtifact(Paths.get(basePath, "test.jar").toFile, "url")
  val unknown = SparkArtifact(Paths.get(basePath, "test.unknown").toFile, "url")

  before {
    val f = new File(basePath)
    if (f.exists()) FileUtils.deleteDirectory(f)
    FileUtils.forceMkdir(f)
    FileUtils.touch(pyFile.local)
    FileUtils.touch(jarFile.local)
  }

  after {
    FileUtils.deleteQuietly(pyFile.local)
    FileUtils.deleteQuietly(jarFile.local)
  }

  it("should select runner by extension") {
    val selector = new SimpleRunnerSelector
    selector.selectRunner(pyFile) shouldBe a[PythonRunner]
    selector.selectRunner(jarFile) shouldBe a[ScalaRunner]
  }


  it("should throw exception when unknown file type is passed") {
    val selector = new SimpleRunnerSelector
    intercept[IllegalArgumentException] {
      selector.selectRunner(unknown)
    }
  }

} 
Example 48
Source File: ForkedSparkContextSpec.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.odkl

import java.io.File

import breeze.linalg
import odkl.analysis.spark.TestEnv
import odkl.analysis.spark.util.SQLOperations
import org.apache.commons.io.FileUtils
import org.scalatest.FlatSpec

class ForkedSparkContextSpec extends FlatSpec with TestEnv with org.scalatest.Matchers with WithTestData {

  "Fork " should " support one layer" in {
    val directory = new File(FileUtils.getTempDirectory, "forkedSpark")
    try {
      val estimator = new ForkedSparkEstimator[LinearRegressionModel,LinearRegressionSGD](new LinearRegressionSGD())
        .setTempPath(directory.getAbsolutePath)
        .setMaster("local[1]")

      val model = estimator.fit(noInterceptData)

      val dev: linalg.Vector[Double] = hiddenModel.asBreeze - model.getCoefficients.asBreeze

      val deviation: Double = dev dot dev

      deviation should be <= delta
      model.getIntercept should be(0.0)
    } finally {
      FileUtils.deleteDirectory(directory)
    }
  }

  "Fork " should " support two layers" in {
    val directory = new File(FileUtils.getTempDirectory, "forkedSpark")
    try {
      val estimator =  new ForkedSparkEstimator[LinearRegressionModel,ForkedSparkEstimator[LinearRegressionModel,LinearRegressionSGD]](
          new ForkedSparkEstimator[LinearRegressionModel,LinearRegressionSGD](new LinearRegressionSGD())
          .setTempPath(directory.getAbsolutePath)
          .setMaster("local[1]"))
        .setTempPath(directory.getAbsolutePath)
        .setMaster("local[1]")

      val model = estimator.fit(noInterceptData)

      val dev: linalg.Vector[Double] = hiddenModel.asBreeze - model.getCoefficients.asBreeze

      val deviation: Double = dev dot dev

      deviation should be <= delta
      model.getIntercept should be(0.0)
    } finally {
      FileUtils.deleteDirectory(directory)
    }
  }
} 
Example 49
Source File: BetweennessEdmonds$Test.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds

import java.nio.file.Files

import ml.sparkling.graph.operators.MeasureTest
import org.apache.commons.io.FileUtils
import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Graph, VertexRDD}


class BetweennessEdmonds$Test(implicit sc: SparkContext) extends MeasureTest {
  val tempDir = Files.createTempDirectory("spark-checkpoint")

  override def beforeAll() = {
    sc.setCheckpointDir(tempDir.toAbsolutePath.toString)
  }

  override def afterAll() = {
    FileUtils.deleteDirectory(tempDir.toFile)
  }

  "Edmonds betweenness centrality for random graph" should "be correctly calculated" in {
    Given("graph")
    val filePath = getClass.getResource("/graphs/graph_ER_15")
    val graph: Graph[Int, Int] = loadGraph(filePath.toString)
    When("Computes betweenness")
    val result = EdmondsBC.computeBC(graph)
    Then("Should calculate betweenness correctly")
    val bcFile = getClass.getResource("/graphs/graph_ER_15_bc")
    val bcCorrectValues = sc.textFile(bcFile.getPath)
      .filter(_.nonEmpty)
      .map(l => { val t = l.split("\t", 2); (t(0).toInt, t(1).toDouble) })
      .sortBy({ case (vId, data) => vId })
      .map({ case (vId, data) => data}).collect()
    val bcValues = result.sortBy({ case (vId, data) => vId })
      .map({ case (vId, data) => data }).collect()
    bcCorrectValues.zip(bcValues).foreach({ case (a, b) =>
      a should be(b +- 1e-5)
    })

    result.unpersist(false)
  }

} 
Example 50
Source File: BetweennessHua$Test.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.hua

import java.nio.file.Files

import ml.sparkling.graph.operators.MeasureTest
import ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.EdmondsBC
import org.apache.commons.io.FileUtils
import org.apache.spark.SparkContext
import org.apache.spark.graphx.Graph
import org.scalatest.tagobjects.Slow


class BetweennessHua$Test (implicit sc: SparkContext) extends MeasureTest {
  val tempDir = Files.createTempDirectory("spark-checkpoint")

  override def beforeAll() = {
    sc.setCheckpointDir(tempDir.toAbsolutePath.toString)
  }

  override def afterAll() = {
    FileUtils.deleteDirectory(tempDir.toFile)
  }

  "Hua betweenness centrality for random graph" should "be correctly calculated" in {
    Given("graph")
    val filePath = getClass.getResource("/graphs/graph_ER_15")
    val graph: Graph[Int, Int] = loadGraph(filePath.toString)
    When("Computes betweenness")
    val result = HuaBC.computeBC(graph)
    Then("Should calculate betweenness correctly")
    val bcFile = getClass.getResource("/graphs/graph_ER_15_bc")
    val bcCorrectValues = sc.textFile(bcFile.getPath)
      .filter(_.nonEmpty)
      .map(l => { val t = l.split("\t", 2); (t(0).toInt, t(1).toDouble) })
      .sortBy({ case (vId, data) => vId })
      .map({ case (vId, data) => data}).collect()
    val bcValues = result.sortBy({ case (vId, data) => vId })
      .map({ case (vId, data) => data }).collect()
    bcCorrectValues.zip(bcValues).foreach({ case (a, b) =>
      a should be(b +- 1e-5)
    })

    result.unpersist(false)
  }

  "Hua betweenness centrality for random graph" should "take no longer then Edmonds" taggedAs(Slow) in {
    Given("graph")
    val filePath = getClass.getResource("/graphs/graph_ER_15")
    val graph: Graph[Int, Int] = loadGraph(filePath.toString)
    When("computes betwenness centrality")
    val (_, edmondsTime) = time("Edmonds algorithm for betweenness centrality")(EdmondsBC.computeBC(graph))
    val (_, huaTime) = time("Hua algorithm for betweenness centrality")(HuaBC.computeBC(graph))
    Then("Hua algorithm should be faster")
    huaTime should be <= edmondsTime
  }

} 
Example 51
Source File: SparkTest.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators

import java.nio.file.{Files, Path}

import ml.sparkling.graph.operators.algorithms.aproximation.ApproximatedShortestPathsAlgorithm$Test
import ml.sparkling.graph.operators.algorithms.coarsening.labelpropagation.LPCoarsening$Test
import ml.sparkling.graph.operators.algorithms.community.pscan.PSCAN$Test
import ml.sparkling.graph.operators.algorithms.link.BasicLinkPredictor$Test
import ml.sparkling.graph.operators.algorithms.shortestpaths.ShortestPathsAlgorithm$Test
import ml.sparkling.graph.operators.measures.edge.AdamicAdar$Test
import ml.sparkling.graph.operators.measures.graph.{FreemanCentrality$Test, Modularity$Test}
import ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.BetweennessEdmonds$Test
import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.BetweennessHua$Test
import ml.sparkling.graph.operators.measures.vertex.closenes.Closeness$Test
import ml.sparkling.graph.operators.measures.vertex.clustering.LocalClustering$Test
import ml.sparkling.graph.operators.measures.vertex.eigenvector.EigenvectorCentrality$Test
import ml.sparkling.graph.operators.measures.vertex.hits.Hits$Test
import ml.sparkling.graph.operators.measures.{NeighborhoodConnectivity$Test, VertexEmbeddedness$Test}
import ml.sparkling.graph.operators.partitioning.{CommunityBasedPartitioning$Test, PSCANBasedPartitioning$Test, PropagationBasedPartitioning$Test}
import org.apache.commons.io.FileUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest._


class SparkTest extends Spec with BeforeAndAfterAll  {
  val file: Path = Files.createTempDirectory("tmpCheckpoint")
  override val invokeBeforeAllAndAfterAllEvenIfNoTestsAreExpected=true
  val master = "local[8]"


  def appName: String = "operators-tests"

  implicit val sc: SparkContext = {
    val conf = new SparkConf()
      .setMaster(master)
      .setAppName(appName)
    val out=new SparkContext(conf)
    out.setCheckpointDir(file.toString)
    out
  }


  override def afterAll() = {
    if(!sc.isStopped){
      sc.stop()
    }
    FileUtils.deleteDirectory(file.toFile)
  }


  override def nestedSuites = {
    Vector(
      new PSCANBasedPartitioning$Test,
      new PropagationBasedPartitioning$Test,
      new ApproximatedShortestPathsAlgorithm$Test,
      new ShortestPathsAlgorithm$Test,
      new EigenvectorCentrality$Test,
      new VertexEmbeddedness$Test,
      new PSCAN$Test,
      new Modularity$Test,
      new CommunityBasedPartitioning$Test,
      new NeighborhoodConnectivity$Test,
      new Hits$Test,
      new LocalClustering$Test,
      new FreemanCentrality$Test,
      new AdamicAdar$Test,
      new BasicLinkPredictor$Test,
      new Closeness$Test,
      new BetweennessEdmonds$Test,
      new BetweennessHua$Test
    )
  }


} 
Example 52
Source File: SortShuffleSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.rdd.ShuffledRDD
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.shuffle.sort.SortShuffleManager
import org.apache.spark.util.Utils

class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {

  // This test suite should run all tests in ShuffleSuite with sort-based shuffle.

  private var tempDir: File = _

  override def beforeAll() {
    super.beforeAll()
    // Once 'spark.local.dir' is set, it is cached. Unless this is manually cleared
    // before/after a test, it could return the same directory even if this property
    // is configured.
    Utils.clearLocalRootDirs()
    conf.set("spark.shuffle.manager", "sort")
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    tempDir = Utils.createTempDir()
    conf.set("spark.local.dir", tempDir.getAbsolutePath)
  }

  override def afterEach(): Unit = {
    try {
      Utils.deleteRecursively(tempDir)
      Utils.clearLocalRootDirs()
    } finally {
      super.afterEach()
    }
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the new serialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new KryoSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the old deserialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new JavaSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = {
    def getAllFiles: Set[File] =
      FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
    val filesBeforeShuffle = getAllFiles
    // Force the shuffle to be performed
    shuffledRdd.count()
    // Ensure that the shuffle actually created files that will need to be cleaned up
    val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
    filesCreatedByShuffle.map(_.getName) should be
    Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
    // Check that the cleanup actually removes the files
    sc.env.blockManager.master.removeShuffle(0, blocking = true)
    for (file <- filesCreatedByShuffle) {
      assert (!file.exists(), s"Shuffle file $file was not cleaned up")
    }
  }
} 
Example 53
Source File: NsdbMiniCluster.scala    From NSDb   with Apache License 2.0 5 votes vote down vote up
package io.radicalbit.nsdb.minicluster

import java.io.File
import java.time.Duration
import java.util.UUID

import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.io.FileUtils

trait NsdbMiniCluster extends LazyLogging {

  protected[this] val instanceId = { UUID.randomUUID }

  protected[this] val startingHostname = "127.0.0."

  protected[this] def rootFolder: String
  protected[this] def nodesNumber: Int
  protected[this] def passivateAfter: Duration
  protected[this] def replicationFactor: Int

  lazy val nodes: Set[NSDbMiniClusterNode] =
    (for {
      i <- 0 until nodesNumber
    } yield
      new NSDbMiniClusterNode(
        hostname = s"$startingHostname${i + 1}",
        storageDir = s"$rootFolder/data$i",
        passivateAfter = passivateAfter,
        replicationFactor = replicationFactor
      )).toSet

  def start(cleanup: Boolean = false): Unit = {
    if (cleanup)
      FileUtils.deleteDirectory(new File(rootFolder))
    nodes.foreach(_.start())
  }

  def stop(): Unit = nodes.foreach(n => n.stop())

} 
Example 54
Source File: GzipUtils.scala    From odinson   with Apache License 2.0 5 votes vote down vote up
package ai.lum.odinson.extra

import org.apache.commons.io.FileUtils
import java.io._
import java.util.zip._
import java.nio.charset.StandardCharsets


object GzipUtils {

  def compress(data: String): Array[Byte] = {
    val baos = new ByteArrayOutputStream(data.length)
    val gzip = new GZIPOutputStream(baos)
    val bytes = data.getBytes(StandardCharsets.UTF_8)
    gzip.write(bytes)
    gzip.close()
    val compressed = baos.toByteArray
    baos.close()
    compressed
  }

  def uncompress(file: File): String = {
    val inputStream = FileUtils.openInputStream(file)
    val res = uncompress(inputStream)
    inputStream.close()
    res
  }

  def uncompress(compressed: Array[Byte]): String = {
    uncompress(new ByteArrayInputStream(compressed))
  }

  def uncompress(input: InputStream): String = {
    val gzip = new GZIPInputStream(input)
    val br = new BufferedReader(new InputStreamReader(gzip, StandardCharsets.UTF_8))
    val sb = new StringBuilder()
    var line: String = br.readLine()
    while (line != null) {
      sb.append(line)
      line = br.readLine()
    }
    br.close()
    gzip.close()
    sb.toString()
  }

} 
Example 55
Source File: YarnShuffleIntegrationSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.io.File

import com.google.common.base.Charsets.UTF_8
import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.scalatest.Matchers

import org.apache.spark._
import org.apache.spark.network.shuffle.ShuffleTestAccessor
import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
import org.apache.spark.tags.ExtendedYarnTest


@ExtendedYarnTest
class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {

  override def newYarnConfig(): YarnConfiguration = {
    val yarnConfig = new YarnConfiguration()
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
      classOf[YarnShuffleService].getCanonicalName)
    yarnConfig.set("spark.shuffle.service.port", "0")
    yarnConfig
  }

  test("external shuffle service") {
    val shuffleServicePort = YarnTestAccessor.getShuffleServicePort
    val shuffleService = YarnTestAccessor.getShuffleServiceInstance

    val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService)

    logInfo("Shuffle service port = " + shuffleServicePort)
    val result = File.createTempFile("result", null, tempDir)
    val finalState = runSpark(
      false,
      mainClassName(YarnExternalShuffleDriver.getClass),
      appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath),
      extraConf = Map(
        "spark.shuffle.service.enabled" -> "true",
        "spark.shuffle.service.port" -> shuffleServicePort.toString
      )
    )
    checkResult(finalState, result)
    assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists())
  }
}

private object YarnExternalShuffleDriver extends Logging with Matchers {

  val WAIT_TIMEOUT_MILLIS = 10000

  def main(args: Array[String]): Unit = {
    if (args.length != 2) {
      // scalastyle:off println
      System.err.println(
        s"""
        |Invalid command line: ${args.mkString(" ")}
        |
        |Usage: ExternalShuffleDriver [result file] [registed exec file]
        """.stripMargin)
      // scalastyle:on println
      System.exit(1)
    }

    val sc = new SparkContext(new SparkConf()
      .setAppName("External Shuffle Test"))
    val conf = sc.getConf
    val status = new File(args(0))
    val registeredExecFile = new File(args(1))
    logInfo("shuffle service executor file = " + registeredExecFile)
    var result = "failure"
    val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup")
    try {
      val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }.
        collect().toSet
      sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
      data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet)
      result = "success"
      // only one process can open a leveldb file at a time, so we copy the files
      FileUtils.copyDirectory(registeredExecFile, execStateCopy)
      assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty)
    } finally {
      sc.stop()
      FileUtils.deleteDirectory(execStateCopy)
      Files.write(result, status, UTF_8)
    }
  }

} 
Example 56
Source File: SortShuffleSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.rdd.ShuffledRDD
import org.apache.spark.shuffle.sort.SortShuffleManager
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.util.Utils

class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {

  // This test suite should run all tests in ShuffleSuite with sort-based shuffle.

  private var tempDir: File = _

  override def beforeAll() {
    conf.set("spark.shuffle.manager", "sort")
  }

  override def beforeEach(): Unit = {
    tempDir = Utils.createTempDir()
    conf.set("spark.local.dir", tempDir.getAbsolutePath)
  }

  override def afterEach(): Unit = {
    try {
      Utils.deleteRecursively(tempDir)
    } finally {
      super.afterEach()
    }
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the new serialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new KryoSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the old deserialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new JavaSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = {
    def getAllFiles: Set[File] =
      FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
    val filesBeforeShuffle = getAllFiles
    // Force the shuffle to be performed
    shuffledRdd.count()
    // Ensure that the shuffle actually created files that will need to be cleaned up
    val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
    filesCreatedByShuffle.map(_.getName) should be
    Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
    // Check that the cleanup actually removes the files
    sc.env.blockManager.master.removeShuffle(0, blocking = true)
    for (file <- filesCreatedByShuffle) {
      assert (!file.exists(), s"Shuffle file $file was not cleaned up")
    }
  }
} 
Example 57
Source File: YarnShuffleIntegrationSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.io.File
import java.nio.charset.StandardCharsets

import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.scalatest.Matchers

import org.apache.spark._
import org.apache.spark.internal.Logging
import org.apache.spark.network.shuffle.ShuffleTestAccessor
import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
import org.apache.spark.tags.ExtendedYarnTest


@ExtendedYarnTest
class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {

  override def newYarnConfig(): YarnConfiguration = {
    val yarnConfig = new YarnConfiguration()
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
      classOf[YarnShuffleService].getCanonicalName)
    yarnConfig.set("spark.shuffle.service.port", "0")
    yarnConfig
  }

  test("external shuffle service") {
    val shuffleServicePort = YarnTestAccessor.getShuffleServicePort
    val shuffleService = YarnTestAccessor.getShuffleServiceInstance

    val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService)

    logInfo("Shuffle service port = " + shuffleServicePort)
    val result = File.createTempFile("result", null, tempDir)
    val finalState = runSpark(
      false,
      mainClassName(YarnExternalShuffleDriver.getClass),
      appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath),
      extraConf = Map(
        "spark.shuffle.service.enabled" -> "true",
        "spark.shuffle.service.port" -> shuffleServicePort.toString
      )
    )
    checkResult(finalState, result)
    assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists())
  }
}

private object YarnExternalShuffleDriver extends Logging with Matchers {

  val WAIT_TIMEOUT_MILLIS = 10000

  def main(args: Array[String]): Unit = {
    if (args.length != 2) {
      // scalastyle:off println
      System.err.println(
        s"""
        |Invalid command line: ${args.mkString(" ")}
        |
        |Usage: ExternalShuffleDriver [result file] [registered exec file]
        """.stripMargin)
      // scalastyle:on println
      System.exit(1)
    }

    val sc = new SparkContext(new SparkConf()
      .setAppName("External Shuffle Test"))
    val conf = sc.getConf
    val status = new File(args(0))
    val registeredExecFile = new File(args(1))
    logInfo("shuffle service executor file = " + registeredExecFile)
    var result = "failure"
    val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup")
    try {
      val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }.
        collect().toSet
      sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
      data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet)
      result = "success"
      // only one process can open a leveldb file at a time, so we copy the files
      FileUtils.copyDirectory(registeredExecFile, execStateCopy)
      assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty)
    } finally {
      sc.stop()
      FileUtils.deleteDirectory(execStateCopy)
      Files.write(result, status, StandardCharsets.UTF_8)
    }
  }

} 
Example 58
Source File: SortShuffleSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.rdd.ShuffledRDD
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.shuffle.sort.SortShuffleManager
import org.apache.spark.util.Utils

class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {

  // This test suite should run all tests in ShuffleSuite with sort-based shuffle.

  private var tempDir: File = _

  override def beforeAll() {
    super.beforeAll()
    conf.set("spark.shuffle.manager", "sort")
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    tempDir = Utils.createTempDir()
    conf.set("spark.local.dir", tempDir.getAbsolutePath)
  }

  override def afterEach(): Unit = {
    try {
      Utils.deleteRecursively(tempDir)
    } finally {
      super.afterEach()
    }
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the new serialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new KryoSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the old deserialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new JavaSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = {
    def getAllFiles: Set[File] =
      FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
    val filesBeforeShuffle = getAllFiles
    // Force the shuffle to be performed
    shuffledRdd.count()
    // Ensure that the shuffle actually created files that will need to be cleaned up
    val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
    filesCreatedByShuffle.map(_.getName) should be
    Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
    // Check that the cleanup actually removes the files
    sc.env.blockManager.master.removeShuffle(0, blocking = true)
    for (file <- filesCreatedByShuffle) {
      assert (!file.exists(), s"Shuffle file $file was not cleaned up")
    }
  }
} 
Example 59
Source File: ScenarioLoader.scala    From mantis   with Apache License 2.0 5 votes vote down vote up
package io.iohk.ethereum.ets.common

import java.io.File

import io.iohk.ethereum.utils.Logger
import org.apache.commons.io.FileUtils

import scala.collection.JavaConverters._
import scala.io.Source


trait ScenarioLoader[T] extends ScenarioParser[T] with Logger {

  def load(path: String, options: TestOptions, ignoredTestNames: Set[String] = Set.empty): List[ScenarioGroup[T]] = {
    val testDir = new File(getClass.getClassLoader.getResource(path).toURI)
    val files = FileUtils.listFiles(testDir, Array("json"), true).asScala.toList

    files.filterNot(file => ignoredTestNames.contains(file.getName)).flatMap { file =>
      val name = file.getAbsolutePath.drop(testDir.getAbsolutePath.length + 1).dropRight(".json".length)

      if (!options.isGroupIncluded(name))
        None
      else {
        log.info(s"Loading test scenarios from: $file")
        val text = Source.fromFile(file).getLines.mkString
        val scenarios = parse(text)
        Some(ScenarioGroup(name, scenarios))
      }
    }
  }
} 
Example 60
Source File: InceptionFetcherTest.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.core.fetcher.tensorflow

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.s2graph.core.fetcher.BaseFetcherTest
import play.api.libs.json.Json

class InceptionFetcherTest extends BaseFetcherTest {
  val runDownloadModel: Boolean = true
  val runCleanup: Boolean = true

  def cleanup(downloadPath: String, dir: String) = {
    synchronized {
      FileUtils.deleteQuietly(new File(downloadPath))
      FileUtils.deleteDirectory(new File(dir))
    }
  }
  def downloadModel(dir: String) = {
    import sys.process._
    synchronized {
      FileUtils.forceMkdir(new File(dir))

      val url = "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip"
      val wget = s"wget $url"
      wget !
      val unzip = s"unzip inception5h.zip -d $dir"
      unzip !
    }
  }

  //TODO: make this test case to run smoothly
  ignore("test get bytes for image url") {
    val downloadPath = "inception5h.zip"
    val modelPath = "inception"
    try {
      if (runDownloadModel) downloadModel(modelPath)

      val serviceName = "s2graph"
      val columnName = "user"
      val labelName = "image_net"
      val options =
        s"""
           |{
           |  "fetcher": {
           |    "className": "org.apache.s2graph.core.fetcher.tensorflow.InceptionFetcher",
           |    "modelPath": "$modelPath"
           |  }
           |}
       """.stripMargin
      val (service, column, label) = initEdgeFetcher(serviceName, columnName, labelName, Option(options))

      val srcVertices = Seq(
        "http://www.gstatic.com/webp/gallery/1.jpg",
        "http://www.gstatic.com/webp/gallery/2.jpg",
        "http://www.gstatic.com/webp/gallery/3.jpg"
      )
      val stepResult = queryEdgeFetcher(service, column, label, srcVertices)

      stepResult.edgeWithScores.groupBy(_.edge.srcVertex).foreach { case (srcVertex, ls) =>
        val url = srcVertex.innerIdVal.toString
        val scores = ls.map { es =>
          val edge = es.edge
          val label = edge.tgtVertex.innerIdVal.toString
          val score = edge.property[Double]("score").value()

          Json.obj("label" -> label, "score" -> score)
        }
        val jsArr = Json.toJson(scores)
        val json = Json.obj("url" -> url, "scores" -> jsArr)
        println(Json.prettyPrint(json))
      }
    } finally {
      if (runCleanup) cleanup(downloadPath, modelPath)
    }
  }
} 
Example 61
Source File: PailDataSourceSpec.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.spark.pail

import java.util

import com.backtype.hadoop.pail.{PailFormatFactory, PailSpec, PailStructure}
import com.backtype.support.{Utils => PailUtils}
import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec}
import org.scalatest.Matchers._

import scala.collection.JavaConverters._
import scala.util.Random

case class User(name: String, age: Int)

class UserPailStructure extends PailStructure[User] {
  override def isValidTarget(dirs: String*): Boolean = true

  override def getType: Class[_] = classOf[User]

  override def serialize(user: User): Array[Byte] = PailUtils.serialize(user)

  override def getTarget(user: User): util.List[String] = List(user.age % 10).map(_.toString).asJava

  override def deserialize(serialized: Array[Byte]): User = PailUtils.deserialize(serialized).asInstanceOf[User]
}

class PailDataSourceSpec extends FlatSpec with BeforeAndAfterAll with PailDataSource {
  private var spark: SparkSession = _

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    spark = SparkSession.builder().master("local[2]").appName("PailDataSource").getOrCreate()
  }

  val userPailSpec = new PailSpec(PailFormatFactory.SEQUENCE_FILE, new UserPailStructure)

  "PailBasedReaderWriter" should "read/write user records from/into pail" in {
    val output = Files.createTempDir()
    val users = (1 to 100).map { index => User(s"foo$index", Random.nextInt(40))}
    spark.sparkContext.parallelize(users)
      .saveAsPail(output.getAbsolutePath, userPailSpec)

    val input = output.getAbsolutePath
    val total = spark.sparkContext.pailFile[User](input)
      .map(u => u.name)
      .count()

    total should be(100)
    FileUtils.deleteDirectory(output)
  }
} 
Example 62
Source File: ParquetAvroDataSourceSpec.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.spark.parquet

import java.io.File

import com.google.common.io.Files
import com.indix.utils.spark.parquet.avro.ParquetAvroDataSource
import org.apache.commons.io.FileUtils
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.apache.spark.sql.SparkSession
import org.scalactic.Equality
import org.scalatest.Matchers.{be, convertToAnyShouldWrapper, equal}
import org.scalatest.{BeforeAndAfterAll, FlatSpec}
import java.util.{Arrays => JArrays}

case class SampleAvroRecord(a: Int, b: String, c: Seq[String], d: Boolean, e: Double, f: collection.Map[String, String], g: Array[Byte])

class ParquetAvroDataSourceSpec extends FlatSpec with BeforeAndAfterAll with ParquetAvroDataSource {
  private var spark: SparkSession = _
  implicit val sampleAvroRecordEq = new Equality[SampleAvroRecord] {
    override def areEqual(left: SampleAvroRecord, b: Any): Boolean = b match {
      case right: SampleAvroRecord =>
        left.a == right.a &&
          left.b == right.b &&
          Equality.default[Seq[String]].areEqual(left.c, right.c) &&
          left.d == right.d &&
          left.e == right.e &&
          Equality.default[collection.Map[String, String]].areEqual(left.f, right.f) &&
          JArrays.equals(left.g, right.g)
      case _ => false
    }
  }

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    spark = SparkSession.builder().master("local[2]").appName("ParquetAvroDataSource").getOrCreate()
  }

  override protected def afterAll(): Unit = {
    try {
      spark.sparkContext.stop()
    } finally {
      super.afterAll()
    }
  }

  "AvroBasedParquetDataSource" should "read/write avro records as ParquetData" in {

    val outputLocation = Files.createTempDir().getAbsolutePath + "/output"

    val sampleRecords: Seq[SampleAvroRecord] = Seq(
      SampleAvroRecord(1, "1", List("a1"), true, 1.0d, Map("a1" -> "b1"), "1".getBytes),
      SampleAvroRecord(2, "2", List("a2"), false, 2.0d, Map("a2" -> "b2"), "2".getBytes),
      SampleAvroRecord(3, "3", List("a3"), true, 3.0d, Map("a3" -> "b3"), "3".getBytes),
      SampleAvroRecord(4, "4", List("a4"), true, 4.0d, Map("a4" -> "b4"), "4".getBytes),
      SampleAvroRecord(5, "5", List("a5"), false, 5.0d, Map("a5" -> "b5"), "5".getBytes)
    )

    val sampleDf = spark.createDataFrame(sampleRecords)

    sampleDf.rdd.saveAvroInParquet(outputLocation, sampleDf.schema, CompressionCodecName.GZIP)

    val sparkVal = spark

    import sparkVal.implicits._

    val records: Array[SampleAvroRecord] = spark.read.parquet(outputLocation).as[SampleAvroRecord].collect()

    records.length should be(5)
    // We use === to use the custom Equality defined above for comparing Array[Byte]
    // Ref - https://github.com/scalatest/scalatest/issues/491
    records.sortBy(_.a) === sampleRecords.sortBy(_.a)

    FileUtils.deleteDirectory(new File(outputLocation))
  }

} 
Example 63
Source File: RocksMapTest.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.store

import java.io.Serializable
import java.nio.file.{Paths, Files}

import org.apache.commons.io.FileUtils
import org.scalatest.{Matchers, FlatSpec}


case class TestObject(a: Int, b: String, c: Array[Int], d: Array[String]) extends Serializable {

  def equals(other: TestObject): Boolean = {
    this.a.equals(other.a) && this.b.equals(other.b) && this.c.sameElements(other.c) && this.d.sameElements(other.d)
  }

}

case class ComplexTestObject(a: Int, b: TestObject) extends Serializable {
  def equals(other: ComplexTestObject): Boolean = {
    this.a.equals(other.a) && this.b.equals(other.b)
  }
}

class RocksMapTest extends FlatSpec with Matchers {

  "RocksMap" should "serialize and deserialize the keys and values" in {
    val db = new RocksMap("test")

    val a: Int = 1
    val b: String = "hello"
    val c: Array[Int] = Array(1, 2, 3)

    val d: Array[String] = Array("a", "b", "c")

    val serialized_a = db.serialize(a)
    val serialized_b = db.serialize(b)
    val serialized_c = db.serialize(c)
    val serialized_d = db.serialize(d)
    val serialized_TestObject = db.serialize(TestObject(a, b, c, d))
    val serialized_ComplexObject = db.serialize(ComplexTestObject(a, TestObject(a, b, c, d)))

    db.deserialize[Int](serialized_a) should be(a)
    db.deserialize[String](serialized_b) should be(b)
    db.deserialize[Array[Int]](serialized_c) should be(c)
    db.deserialize[Array[String]](serialized_d) should be(d)
    db.deserialize[TestObject](serialized_TestObject).equals(TestObject(a, b, c, d)) should be(true)
    db.deserialize[ComplexTestObject](serialized_ComplexObject).equals(ComplexTestObject(a, TestObject(a, b, c, d))) should be(true)
    db.drop()
    db.close()
  }

  it should "put and get values" in {
    val db = new RocksMap("test")

    db.put(1, 1.0)
    db.get[Int, Double](1).getOrElse(0) should be(1.0)
    db.clear()
    db.drop()
    db.close()
  }

  it should "remove values" in {
    val db = new RocksMap("test")

    db.put(1, 1L)
    db.get[Int, Long](1).getOrElse(0) should be(1L)
    db.remove(1)
    db.get[Int, Long](1) should be(None)
    db.drop()
    db.close()
  }

  it should "clear all the values" in {
    val db = new RocksMap(name = "test")
    db.put(1, "hello")
    db.put(2, "yello")
    db.get(1) should not be (None)
    db.get(2) should not be (None)
    db.clear()
    db.get(1) should be(None)
    db.get(2) should be(None)
    db.drop()
    db.close()
  }

  it should "clear the data files when drop is called" in {
    val db = new RocksMap(name = "test")
    Files.exists(Paths.get(db.pathString)) should be (true)
    db.drop()
    Files.exists(Paths.get(db.pathString)) should be (false)
    db.close()
  }


} 
Example 64
Source File: TestSolrStreamWriter.scala    From spark-solr   with Apache License 2.0 5 votes vote down vote up
package com.lucidworks.spark

import java.io.File
import java.util.UUID

import com.lucidworks.spark.util.{ConfigurationConstants, SolrCloudUtil, SolrQuerySupport, SolrSupport}
import org.apache.commons.io.FileUtils
import org.apache.spark.solr.SparkInternalObjects

class TestSolrStreamWriter extends TestSuiteBuilder {

  test("Stream data into Solr") {
    val collectionName = "testStreaming-" + UUID.randomUUID().toString
    SolrCloudUtil.buildCollection(zkHost, collectionName, null, 1, cloudClient, sc)
    sparkSession.conf.set("spark.sql.streaming.schemaInference", "true")
    sparkSession.sparkContext.setLogLevel("DEBUG")
    val offsetsDir = FileUtils.getTempDirectory + "/spark-stream-offsets-" + UUID.randomUUID().toString
    try {
      val datasetPath = "src/test/resources/test-data/oneusagov"
      val streamingJsonDF = sparkSession.readStream.json(datasetPath)
      val accName = "acc-" + UUID.randomUUID().toString
      assert(streamingJsonDF.isStreaming)
      val writeOptions = Map(
        "collection" -> collectionName,
        "zkhost" -> zkHost,
        "checkpointLocation" -> offsetsDir,
        ConfigurationConstants.GENERATE_UNIQUE_KEY -> "true",
        ConfigurationConstants.ACCUMULATOR_NAME -> accName)
      val streamingQuery = streamingJsonDF
        .drop("_id")
        .writeStream
        .outputMode("append")
        .format("solr")
        .options(writeOptions)
        .start()
      try {
        logger.info(s"Explain ${streamingQuery.explain()}")
        streamingQuery.processAllAvailable()
        logger.info(s"Status ${streamingQuery.status}")
        SolrSupport.getCachedCloudClient(zkHost).commit(collectionName)
        assert(SolrQuerySupport.getNumDocsFromSolr(collectionName, zkHost, None) === 13)
        val acc = SparkInternalObjects.getAccumulatorById(SparkSolrAccumulatorContext.getId(accName).get)
        assert(acc.isDefined)
        assert(acc.get.value == 13)
      } finally {
        streamingQuery.stop()
      }
    } finally {
      SolrCloudUtil.deleteCollection(collectionName, cluster)
      FileUtils.deleteDirectory(new File(offsetsDir))
    }
  }
} 
Example 65
Source File: ZookeeperLocalServer.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package it.teamdigitale.miniclusters

import java.io.File
import java.net.InetSocketAddress

import org.apache.commons.io.FileUtils
import org.apache.zookeeper.server.{ServerCnxnFactory, ZooKeeperServer}

class ZookeeperLocalServer(port: Int) {

  var zkServer: Option[ServerCnxnFactory] = None

  def start(): Unit = {
    if (zkServer.isEmpty) {

      val dataDirectory = System.getProperty("java.io.tmpdir")
      val dir = new File(dataDirectory, "zookeeper")
      println(dir.toString)
      if (dir.exists())
        FileUtils.deleteDirectory(dir)

      try {
        val tickTime = 5000
        val server = new ZooKeeperServer(dir.getAbsoluteFile, dir.getAbsoluteFile, tickTime)
        val factory = ServerCnxnFactory.createFactory
        factory.configure(new InetSocketAddress("0.0.0.0", port), 1024)
        factory.startup(server)
        println("ZOOKEEPER server up!!")
        zkServer = Some(factory)

      } catch {
        case ex: Exception => System.err.println(s"Error in zookeeper server: ${ex.printStackTrace()}")
      } finally { dir.deleteOnExit() }
    } else println("ZOOKEEPER is already up")
  }

  def stop() = {
    if (zkServer.isDefined) {
      zkServer.get.shutdown()
    }
    println("ZOOKEEPER server stopped")
  }
} 
Example 66
Source File: CodeGeneratorEngineHook.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.execute.hook

import java.io.File

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineHook}
import com.webank.wedatasphere.linkis.scheduler.executer.{ExecuteRequest, RunTypeExecuteRequest}
import com.webank.wedatasphere.linkis.server.JMap
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.StringUtils

import scala.collection.mutable.ArrayBuffer


@Deprecated
//changed to UdfLoadEngineHook
abstract class CodeGeneratorEngineHook extends EngineHook with Logging{ self =>
  val udfPathProp = "udf.paths"
  protected var creator: String = _
  protected var user: String = _
  protected var initSpecialCode: String = _
  protected val runType: String

  protected def acceptCodeType(line: String): Boolean

  protected def generateCode(): Array[String] = {
    val codeBuffer = new ArrayBuffer[String]
    val statementBuffer = new ArrayBuffer[String]
    var accept = true
    initSpecialCode.split("\n").foreach{
      case "" =>
      case l if l.startsWith("%") =>
        if(acceptCodeType(l)){
          accept = true
          codeBuffer.append(statementBuffer.mkString("\n"))
          statementBuffer.clear()
        }else{
          accept = false
        }
      case l if accept => statementBuffer.append(l)
      case _ =>
    }
    if(statementBuffer.nonEmpty) codeBuffer.append(statementBuffer.mkString("\n"))
    codeBuffer.toArray
  }

  override def beforeCreateEngine(params: JMap[String, String]): JMap[String, String] = {
    creator = params.get("creator")
    user = params.get("user")
    initSpecialCode = StringUtils.split(params.get(udfPathProp), ",").map(readFile).mkString("\n")
    params
  }

  override def afterCreatedEngine(executor: EngineExecutor): Unit = {
    generateCode().foreach {
      case "" =>
      case c: String =>
        info("Submit udf registration to engine, code: " + c)
        executor.execute(new ExecuteRequest with RunTypeExecuteRequest{
          override val code: String = c
          override val runType: String = self.runType
        })
        info("executed code: " + c)
    }
  }

  protected def readFile(path: String): String = {
    info("read file: " + path)
    val file = new File(path)
    if(file.exists()){
      FileUtils.readFileToString(file)
    } else {
      info("udf file: [" + path + "] doesn't exist, ignore it.")
      ""
    }
  }
}
@Deprecated
class SqlCodeGeneratorEngineHook extends CodeGeneratorEngineHook{
  override val runType = "sql"
  override protected def acceptCodeType(line: String): Boolean = {
    line.startsWith("%sql")
  }
}
@Deprecated
class PythonCodeGeneratorEngineHook extends CodeGeneratorEngineHook{
  override val runType = "python"
  override protected def acceptCodeType(line: String): Boolean = {
    line.startsWith("%python")
  }
}
@Deprecated
class ScalaCodeGeneratorEngineHook extends CodeGeneratorEngineHook{
  override val runType = "scala"
  override protected def acceptCodeType(line: String): Boolean = {
    line.startsWith("%scala")
  }
} 
Example 67
Source File: PythonCodeParserTest.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.execute

import java.io.File

import com.google.common.io.Resources
import org.apache.commons.io.FileUtils

object PythonCodeParserTest {
  def main(args: Array[String]): Unit = {
    val parser = new PythonCodeParser
    var code = FileUtils.readFileToString(new File(Resources.getResource("stack.py").getPath))
    parser.parse(code, null).foreach { statement =>
      println("---------------------------statement begin-----------------")
      println(statement)
      println("---------------------------statement end-----------------")
    }
  }
} 
Example 68
Source File: SQLCodeParserTest.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.execute

import java.io.File

import com.google.common.io.Resources
import org.apache.commons.io.FileUtils

object SQLCodeParserTest {
  def main(args: Array[String]): Unit = {
    val parser = new SQLCodeParser
    var code = FileUtils.readFileToString(new File(Resources.getResource("very_complex.sql").getPath))
    parser.parse(code, null).foreach { statement =>
      println("---------------------------statement begin-----------------")
      println(statement)
      println("---------------------------statement end-----------------")
    }
  }
} 
Example 69
Source File: CodeGeneratorEngineHookTest.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.execute.hook

import java.io.File
import java.util

import com.google.common.io.Resources
import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext}
import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine
import com.webank.wedatasphere.linkis.resourcemanager.Resource
import com.webank.wedatasphere.linkis.scheduler.executer.{ExecuteRequest, ExecuteResponse}
import org.apache.commons.io.FileUtils

object CodeGeneratorEngineHookTest {
  def main(args: Array[String]): Unit = {
    val requestEngine = new TestRequestEngine
    requestEngine.properties.put(RequestEngine.ENGINE_INIT_SPECIAL_CODE,
      FileUtils.readFileToString(new File(Resources.getResource("engine_special_code").getPath)))
    val engineExecutor = new TestEngineExecutor(1, true)

    var engineHook: CodeGeneratorEngineHook = new SqlCodeGeneratorEngineHook
    engineHook.beforeCreateEngine(new util.HashMap(requestEngine.properties))
    engineHook.afterCreatedEngine(engineExecutor)
    engineHook = new PythonCodeGeneratorEngineHook
    engineHook.beforeCreateEngine(new util.HashMap(requestEngine.properties))
    engineHook.afterCreatedEngine(engineExecutor)
    engineHook = new ScalaCodeGeneratorEngineHook
    engineHook.beforeCreateEngine(new util.HashMap(requestEngine.properties))
    engineHook.afterCreatedEngine(engineExecutor)

  }
}
class TestRequestEngine extends RequestEngine {
  override val user: String = ""
  override val properties: util.Map[String, String] = new util.HashMap[String, String](){

  }
  override val creator: String = ""
}
class TestEngineExecutor(outputPrintLimit: Int, isSupportParallelism: Boolean) extends EngineExecutor(outputPrintLimit, isSupportParallelism){

  override def execute(executeRequest: ExecuteRequest): ExecuteResponse = {
    null
  }

  override def getName: String = ""

  override def getActualUsedResources: Resource = null

  override protected def executeLine(engineExecutorContext: EngineExecutorContext, code: String): ExecuteResponse = null

  override protected def executeCompletely(engineExecutorContext: EngineExecutorContext, code: String, completedLine: String): ExecuteResponse = null

  override def close(): Unit = null
} 
Example 70
Source File: ScalaCodeParserTest.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.execute

import java.io.File

import com.google.common.io.Resources
import org.apache.commons.io.FileUtils

object ScalaCodeParserTest {
  def main(args: Array[String]): Unit = {
    val parser = new ScalaCodeParser
    var code = FileUtils.readFileToString(new File(Resources.getResource("test.scala.txt").getPath))
    parser.parse(code, null).foreach { statement =>
      println("---------------------------statement begin-----------------")
      println(statement)
      println("---------------------------statement end-----------------")
    }
  }

} 
Example 71
Source File: JarLoaderEngineHook.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.enginemanager.hook

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.enginemanager.{Engine, EngineHook}
import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration.ENGINE_UDF_APP_NAME
import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.udf.api.rpc.{RequestUdfTree, ResponseUdfTree}
import com.webank.wedatasphere.linkis.udf.entity.{UDFInfo, UDFTree}
import org.apache.commons.collections.CollectionUtils
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.StringUtils
import org.codehaus.jackson.map.ObjectMapper

import scala.collection.JavaConversions._
import scala.collection.mutable

class JarLoaderEngineHook extends EngineHook with Logging{

  override def beforeCreateSession(requestEngine: RequestEngine): RequestEngine = {
    info("start loading UDFs")
    val udfInfos = extractUdfInfos(requestEngine).filter{info => info.getUdfType == 0 && info.getExpire == false && StringUtils.isNotBlank(info.getPath) && isJarExists(info) && info.getLoad == true }
    // add to class path
    val jars = new mutable.HashSet[String]()
    udfInfos.foreach{udfInfo => jars.add("file://" + udfInfo.getPath)}
    val jarPaths = jars.mkString(",")
    if(StringUtils.isBlank(requestEngine.properties.get("jars"))){
      requestEngine.properties.put("jars", jarPaths)
    } else {
      requestEngine.properties.put("jars", requestEngine.properties.get("jars") + "," + jarPaths)
    }
    info("added jars: " + jarPaths)
    //jars.foreach(fetchRemoteFile)
    //info("copied jars.")
    info("end loading UDFs")
    requestEngine
  }

  override def afterCreatedSession(engine: Engine, requestEngine: RequestEngine): Unit = {
  }

  protected def isJarExists(udfInfo: UDFInfo) : Boolean = {
    true
//    if(FileUtils.getFile(udfInfo.getPath).exists()){
//      true
//    } else {
//      info(s"The jar file [${udfInfo.getPath}] of UDF [${udfInfo.getUdfName}] doesn't exist, ignore it.")
//      false
//    }
  }

  protected def extractUdfInfos(requestEngine: RequestEngine): mutable.ArrayBuffer[UDFInfo] = {
    val udfInfoBuilder = new mutable.ArrayBuffer[UDFInfo]
    val userName = requestEngine.user
    val udfTree = queryUdfRpc(userName)
    extractUdfInfos(udfInfoBuilder, udfTree, userName)
    udfInfoBuilder
  }

  protected def extractUdfInfos(udfInfoBuilder: mutable.ArrayBuffer[UDFInfo], udfTree: UDFTree, userName: String) : Unit = {
    if(CollectionUtils.isNotEmpty(udfTree.getUdfInfos)){
      for(udfInfo <- udfTree.getUdfInfos){
        udfInfoBuilder.append(udfInfo)
      }
    }
    if(CollectionUtils.isNotEmpty(udfTree.getChildrens)){
      for(child <- udfTree.getChildrens){
        var childInfo = child
        if(TreeType.specialTypes.contains(child.getUserName)){
          childInfo = queryUdfRpc(userName, child.getId, child.getUserName)
        } else {
          childInfo = queryUdfRpc(userName, child.getId, TreeType.SELF)
        }
        extractUdfInfos(udfInfoBuilder, childInfo, userName)
      }
    }
  }

  private def queryUdfRpc(userName: String, treeId: Long = -1, treeType: String = "self"): UDFTree = {
    val udfTree = Sender.getSender(ENGINE_UDF_APP_NAME.getValue)
      .ask(RequestUdfTree(userName, treeType, treeId, "udf"))
      .asInstanceOf[ResponseUdfTree]
      .udfTree
    //info("got udf tree:" + new ObjectMapper().writer().withDefaultPrettyPrinter().writeValueAsString(udfTree))
    udfTree
  }
} 
Example 72
Source File: TokenAuthentication.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.gateway.security.token

import java.io.File
import java.util.Properties
import java.util.concurrent.TimeUnit

import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.gateway.config.GatewayConfiguration._
import com.webank.wedatasphere.linkis.gateway.http.GatewayContext
import com.webank.wedatasphere.linkis.gateway.security.{GatewaySSOUtils, SecurityFilter}
import com.webank.wedatasphere.linkis.server.Message
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.commons.lang.StringUtils


object TokenAuthentication extends Logging {

  private val (props, file) = if(ENABLE_TOKEN_AUTHENTICATION.getValue)
    (new Properties, new File(this.getClass.getClassLoader.getResource(TOKEN_AUTHENTICATION_CONFIG.getValue).toURI.getPath))
  else (null, null)
  private var lastModified = 0l

  if(ENABLE_TOKEN_AUTHENTICATION.getValue) {
    Utils.defaultScheduler.scheduleAtFixedRate(new Runnable {
      override def run(): Unit = Utils.tryAndError(init())
    }, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TimeUnit.MILLISECONDS)
    init()
  }

  private def init(): Unit = if(file.lastModified() > lastModified) {
    lastModified = file.lastModified()
    info(s"loading token authentication file $file.")
    val newProps = new Properties
    val input = FileUtils.openInputStream(file)
    Utils.tryFinally(newProps.load(input))(IOUtils.closeQuietly(input))
    props.putAll(newProps)
  }

  private def validateTokenUser(token: String, tokenUser: String): Boolean = {
    val tokenUsers = props.getProperty(token)
    if(tokenUsers == "*" || (StringUtils.isNotBlank(tokenUsers) && tokenUsers.contains(tokenUser))) true
    else false
  }

  def isTokenRequest(gatewayContext: GatewayContext) : Boolean = {
    (gatewayContext.getRequest.getHeaders.containsKey(TOKEN_KEY) &&
      gatewayContext.getRequest.getHeaders.containsKey(TOKEN_USER_KEY)) || (
      gatewayContext.getRequest.getCookies.containsKey(TOKEN_KEY) &&
        gatewayContext.getRequest.getCookies.containsKey(TOKEN_USER_KEY))
  }

  def tokenAuth(gatewayContext: GatewayContext): Boolean = {
    if(!ENABLE_TOKEN_AUTHENTICATION.getValue) {
      val message = Message.noLogin(s"Gateway未启用token认证,请采用其他认证方式!") << gatewayContext.getRequest.getRequestURI
      SecurityFilter.filterResponse(gatewayContext, message)
      return false
    }
    var token = gatewayContext.getRequest.getHeaders.get(TOKEN_KEY)(0)
    var tokenUser = gatewayContext.getRequest.getHeaders.get(TOKEN_USER_KEY)(0)
    if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) {
      token = gatewayContext.getRequest.getCookies.get(TOKEN_KEY)(0).getValue
      tokenUser = gatewayContext.getRequest.getCookies.get(TOKEN_USER_KEY)(0).getValue
      if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) {
        val message = Message.noLogin(s"请在Header或Cookie中同时指定$TOKEN_KEY 和 $TOKEN_USER_KEY,以便完成token认证!") << gatewayContext.getRequest.getRequestURI
        SecurityFilter.filterResponse(gatewayContext, message)
        return false
      }
    }
    if(validateTokenUser(token, tokenUser)){
      info(s"Token authentication succeed, uri: ${gatewayContext.getRequest.getRequestURI}, token: $token, tokenUser: $tokenUser.")
      GatewaySSOUtils.setLoginUser(gatewayContext.getRequest, tokenUser)
      true
    } else {
      val message = Message.noLogin(s"未授权的token$token,无法将请求绑定给tokenUser$tokenUser!") << gatewayContext.getRequest.getRequestURI
      SecurityFilter.filterResponse(gatewayContext, message)
      false
    }
  }

} 
Example 73
Source File: RefreshUtils.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.common.utils

import java.io.File
import java.util.concurrent.TimeUnit

import com.webank.wedatasphere.linkis.common.conf.Configuration
import org.apache.commons.io.FileUtils


object RefreshUtils {

  def registerFileRefresh(period: Long, file: String, deal: java.util.List[String] => Unit): Unit = {
    Utils.defaultScheduler.scheduleAtFixedRate(new Runnable {
      val f = new File(file)
      var fileModifiedTime = if(f.exists()) f.lastModified() else 0
      override def run(): Unit = {
        if(!f.exists()) return
        if(f.lastModified() > fileModifiedTime) {
          deal(FileUtils.readLines(f, Configuration.BDP_ENCODING.getValue))
          fileModifiedTime = f.lastModified()
        }
      }
    }, period, period, TimeUnit.MILLISECONDS)
  }

}
abstract class Deal {
  def deal(line: String): Unit
} 
Example 74
Source File: TestUtil.scala    From pulse   with Apache License 2.0 5 votes vote down vote up
package io.phdata.pulse.solr

import java.io.File
import java.nio.file.Paths
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.apache.solr.client.solrj.embedded.JettyConfig
import org.apache.solr.cloud.MiniSolrCloudCluster

object TestUtil {

  def miniSolrCloudCluster(): MiniSolrCloudCluster = {
    // clean up the solr files so we don't try to read collections from old runs
    FileUtils.deleteDirectory(new File("target/solr7"))

    // Set up a MiniSolrCloudCluster
    val clusterHome =
      s"${System.getProperty("user.dir")}/target/solr7/solrHome/${UUID.randomUUID()}"
    val jettyConfig =
      JettyConfig.builder().setContext("/solr").setPort(8983).stopAtShutdown(true).build()

    new MiniSolrCloudCluster(1,
                             null,
                             Paths.get(clusterHome),
                             MiniSolrCloudCluster.DEFAULT_CLOUD_SOLR_XML,
                             null,
                             null)
  }

  def randomIdentifier() = UUID.randomUUID().toString.substring(0, 5)
} 
Example 75
Source File: TestUtil.scala    From pulse   with Apache License 2.0 5 votes vote down vote up
package io.phdata.pulse.solr

import java.io.File
import java.nio.file.Paths
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.apache.solr.client.solrj.embedded.JettyConfig
import org.apache.solr.cloud.MiniSolrCloudCluster

object TestUtil {

  def miniSolrCloudCluster(): MiniSolrCloudCluster = {

    val DEFAULT_SOLR_CLOUD_XML =
      """<solr>
        |
        |  <str name="shareSchema">${shareSchema:false}</str>
        |  <str name="configSetBaseDir">${configSetBaseDir:configsets}</str>
        |  <str name="coreRootDirectory">${coreRootDirectory:target/solr4/cores}</str>
        |
        |  <shardHandlerFactory name="shardHandlerFactory" class="HttpShardHandlerFactory">
        |    <str name="urlScheme">${urlScheme:}</str>
        |    <int name="socketTimeout">${socketTimeout:90000}</int>
        |    <int name="connTimeout">${connTimeout:15000}</int>
        |  </shardHandlerFactory>
        |
        |  <solrcloud>
        |    <str name="host">127.0.0.1</str>
        |    <int name="hostPort">${hostPort:8983}</int>
        |    <str name="hostContext">${hostContext:solr}</str>
        |    <int name="zkClientTimeout">${solr.zkclienttimeout:30000}</int>
        |    <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
        |    <int name="leaderVoteWait">10000</int>
        |    <int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:45000}</int>
        |    <int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:340000}</int>
        |  </solrcloud>
        |
        |</solr>""".stripMargin

    System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory")
    // clean up the solr files so we don't try to read collections from old runs
    FileUtils.deleteDirectory(new File("target/solr4"))

    // Set up a MiniSolrCloudCluster
    val clusterHome =
      s"${System.getProperty("user.dir")}/target/solr4/solrHome/${UUID.randomUUID()}"
    val jettyConfig =
      JettyConfig.builder().setContext("/solr").setPort(8983).stopAtShutdown(true).build()

    new MiniSolrCloudCluster(1, Paths.get(clusterHome), DEFAULT_SOLR_CLOUD_XML, jettyConfig)
  }

  def randomIdentifier() = UUID.randomUUID().toString.substring(0, 5)

} 
Example 76
Source File: S3PointCloudInputFormat.scala    From geotrellis-pointcloud   with Apache License 2.0 5 votes vote down vote up
package geotrellis.pointcloud.spark.store.s3

import geotrellis.spark.store.s3._
import geotrellis.pointcloud.spark.store.hadoop.formats._
import geotrellis.pointcloud.util.Filesystem

import io.pdal._
import io.circe.Json
import io.circe.syntax._
import cats.syntax.either._
import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext}
import org.apache.commons.io.FileUtils

import java.io.{File, InputStream}
import java.net.URI

import scala.collection.JavaConverters._


    mode match {
      case "s3" =>
        new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) {
          def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = {
            val s3Pipeline =
              pipeline
                .hcursor
                .downField("pipeline").downArray
                .downField("filename").withFocus(_ => uri.toString.asJson)
                .top.fold(pipeline)(identity)

            executePipeline(context)(key, s3Pipeline)
          }
        }

      case _ =>
        val tmpDir = {
          val dir = PointCloudInputFormat.getTmpDir(context)
          if (dir == null) Filesystem.createDirectory()
          else Filesystem.createDirectory(dir)
        }

        new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) {
          def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = {
            // copy remote file into local tmp dir
            tmpDir.mkdirs() // to be sure that dirs created
            val localPath = new File(tmpDir, key.replace("/", "_"))
            FileUtils.copyInputStreamToFile(is, localPath)
            is.close()

            // use local filename path if it's present in json
            val localPipeline =
              pipeline
                .hcursor
                .downField("pipeline").downArray
                .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson)
                .top.fold(pipeline)(identity)

            try executePipeline(context)(key, localPipeline) finally {
              localPath.delete()
              tmpDir.delete()
            }
          }
        }
    }
  }
} 
Example 77
Source File: KafkaServer.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.env

import java.io.File
import java.net.InetSocketAddress
import java.nio.file.{Files, Paths}
import java.util.Properties

import kafka.server.{KafkaConfig, KafkaServerStartable}
import org.apache.commons.io.FileUtils
import org.apache.zookeeper.server.quorum.QuorumPeerConfig
import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain}


object KafkaServer extends App {

  val zookeeperPort = 2181

  val kafkaLogs = "/tmp/kafka-logs"
  val kafkaLogsPath = Paths.get(kafkaLogs)

  // See: https://stackoverflow.com/questions/59592518/kafka-broker-doesnt-find-cluster-id-and-creates-new-one-after-docker-restart/60864763#comment108382967_60864763
  def fix25Behaviour() = {
    val fileWithConflictingContent = kafkaLogsPath.resolve("meta.properties").toFile
    if (fileWithConflictingContent.exists())  FileUtils.forceDelete(fileWithConflictingContent)
  }

  def removeKafkaLogs(): Unit = {
    if (kafkaLogsPath.toFile.exists()) FileUtils.forceDelete(kafkaLogsPath.toFile)
  }

  // Keeps the persistent data
  fix25Behaviour()
  // If everything fails
  //removeKafkaLogs()

  val quorumConfiguration = new QuorumPeerConfig {
    // Since we do not run a cluster, we are not interested in zookeeper data
    override def getDataDir: File = Files.createTempDirectory("zookeeper").toFile
    override def getDataLogDir: File = Files.createTempDirectory("zookeeper-logs").toFile
    override def getClientPortAddress: InetSocketAddress = new InetSocketAddress(zookeeperPort)
  }

  class StoppableZooKeeperServerMain extends ZooKeeperServerMain {
    def stop(): Unit = shutdown()
  }

  val zooKeeperServer = new StoppableZooKeeperServerMain()

  val zooKeeperConfig = new ServerConfig()
  zooKeeperConfig.readFrom(quorumConfiguration)

  val zooKeeperThread = new Thread {
    override def run(): Unit = zooKeeperServer.runFromConfig(zooKeeperConfig)
  }

  zooKeeperThread.start()

  val kafkaProperties = new Properties()
  kafkaProperties.put("zookeeper.connect", s"localhost:$zookeeperPort")
  kafkaProperties.put("broker.id", "0")
  kafkaProperties.put("offsets.topic.replication.factor", "1")
  kafkaProperties.put("log.dirs", kafkaLogs)
  kafkaProperties.put("delete.topic.enable", "true")
  kafkaProperties.put("group.initial.rebalance.delay.ms", "0")
  kafkaProperties.put("transaction.state.log.min.isr", "1")
  kafkaProperties.put("transaction.state.log.replication.factor", "1")
  kafkaProperties.put("zookeeper.connection.timeout.ms", "6000")
  kafkaProperties.put("num.partitions", "10")

  val kafkaConfig = KafkaConfig.fromProps(kafkaProperties)

  val kafka = new KafkaServerStartable(kafkaConfig)

  println("About to start...")
  kafka.startup()

  scala.sys.addShutdownHook{
    println("About to shutdown...")
    kafka.shutdown()
    kafka.awaitShutdown()
    zooKeeperServer.stop()
  }

  zooKeeperThread.join()
} 
Example 78
Source File: TestSetup.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.io.File
import java.nio.file.Paths

import org.apache.commons.io.FileUtils
import org.scalatest.Tag

object TestSetup {

  private var runSetup = true

  val configTest = getClass.getResource("/test-fey-configuration.conf")

  def setup(): Unit = {
    if(runSetup){
      println("SETTING UP ...")
      createFeyTmpDirectoriesForTest()
      copyTestActorToTmp()
      copyJSONstoTmp()
      runSetup = false
    }
  }

  private def copyTestActorToTmp(): Unit = {
    copyResourceFileToLocal("/fey-test-actor.jar",s"${CONFIG.JAR_REPOSITORY}/fey-test-actor.jar")
  }

  private def copyJSONstoTmp(): Unit = {
    copyResourceFileToLocal("/json/valid-json.json",s"${CONFIG.JSON_REPOSITORY}/valid-json.json.not")
    copyResourceFileToLocal("/json/invalid-json.json",s"${CONFIG.JSON_REPOSITORY}/invalid-json.json.not")
  }

  private def copyResourceFileToLocal(resourcePath: String, destination: String): Unit = {
    val resourceFile = getClass.getResource(resourcePath)
    val dest = new File(destination)
    FileUtils.copyURLToFile(resourceFile, dest)
  }

  private def createFeyTmpDirectoriesForTest(): Unit = {
    var file = new File(s"/tmp/fey/test/checkpoint")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/json")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/json/watchtest")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/jars")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/jars/dynamic")
    file.mkdirs()
  }

}

object SlowTest extends Tag("org.apache.iota.fey.SlowTest") 
Example 79
Source File: Tryout.scala    From spark-es   with Apache License 2.0 5 votes vote down vote up
import java.nio.file.Files

import org.apache.commons.io.FileUtils
import org.apache.spark.SparkContext
import org.elasticsearch.common.settings.Settings
import org.elasticsearch.node.NodeBuilder
import org.apache.spark.elasticsearch._

object Tryout {
  def main(args: Array[String]): Unit = {
    val sparkContext = new SparkContext("local[2]", "SparkES")

    val dataDir = Files.createTempDirectory("elasticsearch").toFile

    dataDir.deleteOnExit()

    val settings = Settings.settingsBuilder()
      .put("path.home", dataDir.getAbsolutePath)
      .put("path.logs", s"${dataDir.getAbsolutePath}/logs")
      .put("path.data", s"${dataDir.getAbsolutePath}/data")
      .put("index.store.fs.memory.enabled", true)
      .put("index.number_of_shards", 1)
      .put("index.number_of_replicas", 0)
      .put("cluster.name", "SparkES")
      .build()

    val node = NodeBuilder.nodeBuilder().settings(settings).node()

    val client = node.client()

    sparkContext
      .parallelize(Seq(
      ESDocument(ESMetadata("2", "type1", "index1"), """{"name": "John Smith"}"""),
      ESDocument(ESMetadata("1", "type1", "index1"), """{"name": "Sergey Shumov"}""")
    ), 2)
      .saveToES(Seq("localhost"), "SparkES")
    
    client.admin().cluster().prepareHealth("index1").setWaitForGreenStatus().get()

    val documents = sparkContext.esRDD(
      Seq("localhost"), "SparkES", Seq("index1"), Seq("type1"), "name:sergey")

    println(documents.count())

    documents.foreach(println)

    sparkContext.stop()

    client.close()

    node.close()

    FileUtils.deleteQuietly(dataDir)
  }
} 
Example 80
Source File: LocalElasticSearch.scala    From spark-es   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.elasticsearch

import java.nio.file.Files
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.elasticsearch.common.settings.Settings
import org.elasticsearch.node.{NodeBuilder, Node}

class LocalElasticSearch(val clusterName: String = UUID.randomUUID().toString) {
  lazy val node = buildNode()
  lazy val client = node.client()
  val dataDir = Files.createTempDirectory("elasticsearch").toFile

  private var started = false

  def buildNode(): Node = {
    val settings = Settings.settingsBuilder()
      .put("path.home", dataDir.getAbsolutePath)
      .put("path.logs", s"${dataDir.getAbsolutePath}/logs")
      .put("path.data", s"${dataDir.getAbsolutePath}/data")
      .put("index.store.fs.memory.enabled", true)
      .put("index.number_of_shards", 1)
      .put("index.number_of_replicas", 0)
      .put("cluster.name", clusterName)
      .build()

    val instance = NodeBuilder.nodeBuilder().settings(settings).node()

    started = true

    instance
  }

  def close(): Unit = {
    if (started) {
      client.close()
      node.close()
    }

    try {
      FileUtils.forceDelete(dataDir)
    } catch {
      case e: Exception =>
    }
  }
} 
Example 81
Source File: MultiNodeSupportCassandra.scala    From eventuate   with Apache License 2.0 5 votes vote down vote up
package com.rbmhtechnology.eventuate

import java.io.File

import akka.actor.Props
import akka.remote.testconductor.RoleName
import akka.remote.testkit.MultiNodeSpec

import com.rbmhtechnology.eventuate.log.cassandra._

import org.apache.commons.io.FileUtils
import org.scalatest.BeforeAndAfterAll

trait MultiNodeSupportCassandra extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec =>
  val coordinator = RoleName("nodeA")

  def cassandraDir: String =
    MultiNodeEmbeddedCassandra.DefaultCassandraDir

  def logProps(logId: String): Props =
    CassandraEventLog.props(logId)

  override def atStartup(): Unit = {
    if (isNode(coordinator)) {
      MultiNodeEmbeddedCassandra.start(cassandraDir)
      Cassandra(system)
    }
    enterBarrier("startup")
  }

  override def afterAll(): Unit = {
    // get all config data before shutting down node
    val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir"))

    // shut down node
    super.afterAll()

    // clean database and delete snapshot files
    if (isNode(coordinator)) {
      FileUtils.deleteDirectory(snapshotRootDir)
      MultiNodeEmbeddedCassandra.clean()
    }
  }
} 
Example 82
Source File: MultiNodeSupportLeveldb.scala    From eventuate   with Apache License 2.0 5 votes vote down vote up
package com.rbmhtechnology.eventuate

import java.io.File

import akka.actor.Props
import akka.remote.testconductor.RoleName
import akka.remote.testkit.MultiNodeSpec

import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog

import org.apache.commons.io.FileUtils
import org.scalatest.BeforeAndAfterAll

trait MultiNodeSupportLeveldb extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec =>
  val coordinator = RoleName("nodeA")

  def logProps(logId: String): Props =
    LeveldbEventLog.props(logId)

  override def afterAll(): Unit = {
    // get all config data before shutting down node
    val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir"))
    val logRootDir = new File(system.settings.config.getString("eventuate.log.leveldb.dir"))

    // shut down node
    super.afterAll()

    // delete log and snapshot files
    if (isNode(coordinator)) {
      FileUtils.deleteDirectory(snapshotRootDir)
      FileUtils.deleteDirectory(logRootDir)
    }
  }
} 
Example 83
Source File: PersistOnEventWithRecoverySpecLeveldb.scala    From eventuate   with Apache License 2.0 5 votes vote down vote up
package com.rbmhtechnology.eventuate

import java.util.UUID

import akka.actor.Actor
import akka.actor.ActorRef
import akka.actor.Props
import akka.testkit.TestProbe
import com.rbmhtechnology.eventuate.ReplicationIntegrationSpec.replicationConnection
import com.rbmhtechnology.eventuate.utilities._
import org.apache.commons.io.FileUtils
import org.scalatest.Matchers
import org.scalatest.WordSpec

import scala.concurrent.duration.DurationInt

object PersistOnEventWithRecoverySpecLeveldb {
  class OnBEmitRandomActor(val eventLog: ActorRef, probe: TestProbe) extends EventsourcedActor with PersistOnEvent {

    override def id = getClass.getName

    override def onCommand = Actor.emptyBehavior

    override def onEvent = {
      case "A"          =>
      case "B"          => persistOnEvent(UUID.randomUUID().toString)
      case uuid: String => probe.ref ! uuid
    }
  }

  def persistOnEventProbe(locationA1: Location, log: ActorRef) = {
    val probe = locationA1.probe
    locationA1.system.actorOf(Props(new OnBEmitRandomActor(log, probe)))
    probe
  }

  val noMsgTimeout = 100.millis
}

class PersistOnEventWithRecoverySpecLeveldb extends WordSpec with Matchers with MultiLocationSpecLeveldb {
  import RecoverySpecLeveldb._
  import PersistOnEventWithRecoverySpecLeveldb._

  override val logFactory: String => Props =
    id => SingleLocationSpecLeveldb.TestEventLog.props(id, batching = true)

  "An EventsourcedActor with PersistOnEvent" must {
    "not re-attempt persistence on successful write after reordering of events through disaster recovery" in {
      val locationB = location("B", customConfig = RecoverySpecLeveldb.config)
      def newLocationA = location("A", customConfig = RecoverySpecLeveldb.config)
      val locationA1 = newLocationA

      val endpointB = locationB.endpoint(Set("L1"), Set(replicationConnection(locationA1.port)))
      def newEndpointA(l: Location, activate: Boolean) = l.endpoint(Set("L1"), Set(replicationConnection(locationB.port)), activate = activate)
      val endpointA1 = newEndpointA(locationA1, activate = true)

      val targetA = endpointA1.target("L1")
      val logDirA = logDirectory(targetA)
      val targetB = endpointB.target("L1")
      val a1Probe = persistOnEventProbe(locationA1, targetA.log)

      write(targetA, List("A"))
      write(targetB, List("B"))
      val event = a1Probe.expectMsgClass(classOf[String])
      assertConvergence(Set("A", "B", event), endpointA1, endpointB)

      locationA1.terminate().await
      FileUtils.deleteDirectory(logDirA)

      val locationA2 = newLocationA
      val endpointA2 = newEndpointA(locationA2, activate = false)
      endpointA2.recover().await

      val a2Probe = persistOnEventProbe(locationA2, endpointA2.logs("L1"))
      a2Probe.expectMsg(event)
      a2Probe.expectNoMsg(noMsgTimeout)
      assertConvergence(Set("A", "B", event), endpointA2, endpointB)
    }
  }
} 
Example 84
Source File: NeuralNetwork.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Trainer

import org.deeplearning4j.nn.conf.MultiLayerConfiguration
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.nd4j.linalg.factory.Nd4j
import java.io.File
import org.apache.commons.io.FileUtils
import java.io.{DataInputStream, DataOutputStream, FileInputStream}
import java.nio.file.{Files, Paths}

object NeuralNetwork {  
  def loadNN(NNconfig: String, NNparams: String) = {
    // get neural network config
    val confFromJson: MultiLayerConfiguration = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(NNconfig)))    
     // get neural network parameters 
    val dis: DataInputStream = new DataInputStream(new FileInputStream(NNparams))
    val newParams = Nd4j.read(dis)    
     // creating network object
    val savedNetwork: MultiLayerNetwork = new MultiLayerNetwork(confFromJson)
    savedNetwork.init()
    savedNetwork.setParameters(newParams)    
    savedNetwork
  }
  
  def saveNN(model: MultiLayerNetwork, NNconfig: String, NNparams: String) = {
    // save neural network config
    FileUtils.write(new File(NNconfig), model.getLayerWiseConfigurations().toJson())     
    // save neural network parms
    val dos: DataOutputStream = new DataOutputStream(Files.newOutputStream(Paths.get(NNparams)))
    Nd4j.write(model.params(), dos)
  }  
} 
Example 85
Source File: ExampleMahaService.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.api.jersey.example

import java.io.File
import java.util.UUID

import com.yahoo.maha.core.ddl.OracleDDLGenerator
import com.yahoo.maha.jdbc.{JdbcConnection, List, Seq}
import com.yahoo.maha.service.{DefaultMahaService, MahaService, MahaServiceConfig}
import com.zaxxer.hikari.{HikariConfig, HikariDataSource}
import grizzled.slf4j.Logging
import org.apache.commons.io.FileUtils
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat

object ExampleMahaService extends Logging {

  val REGISTRY_NAME = "academic";

  private var dataSource: Option[HikariDataSource] = None
  private var jdbcConnection: Option[JdbcConnection] = None
  val h2dbId = UUID.randomUUID().toString.replace("-","")
  val today: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now())
  val yesterday: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now().minusDays(1))

  def initJdbcToH2(): Unit = {
    val config = new HikariConfig()
    config.setJdbcUrl(s"jdbc:h2:mem:$h2dbId;MODE=Oracle;DB_CLOSE_DELAY=-1")
    config.setUsername("sa")
    config.setPassword("h2.test.database.password")
    config.setMaximumPoolSize(2)
    dataSource = Option(new HikariDataSource(config))
    jdbcConnection = dataSource.map(new JdbcConnection(_))
    assert(jdbcConnection.isDefined, "Failed to connect to h2 local server")
  }

  def getMahaService(scope: String = "main"): MahaService = {
    val jsonString = FileUtils.readFileToString(new File(s"src/$scope/resources/maha-service-config.json"))
      .replaceAll("h2dbId", s"$h2dbId")

    initJdbcToH2()

    val mahaServiceResult = MahaServiceConfig.fromJson(jsonString.getBytes("utf-8"))
    if (mahaServiceResult.isFailure) {
      mahaServiceResult.leftMap {
        res=>
          error(s"Failed to launch Example MahaService, MahaService Error list is: ${res.list.toList}")
      }
    }
    val mahaServiceConfig = mahaServiceResult.toOption.get
    val mahaService: MahaService = new DefaultMahaService(mahaServiceConfig)
    stageStudentData(mahaServiceConfig)
    mahaService
  }

  def stageStudentData(mahaServiceConfig: MahaServiceConfig) : Unit = {

    val ddlGenerator = new OracleDDLGenerator
    val erRegistryConfig = mahaServiceConfig.registry.get(ExampleMahaService.REGISTRY_NAME).get
    val erRegistry= erRegistryConfig.registry
    erRegistry.factMap.values.foreach {
      publicFact =>
        publicFact.factList.foreach {
          fact=>
            val ddl = ddlGenerator.toDDL(fact)
            assert(jdbcConnection.get.executeUpdate(ddl).isSuccess)
        }
    }

    val insertSql =
      """
        INSERT INTO student_grade_sheet (year, section_id, student_id, class_id, total_marks, date, comment)
        VALUES (?, ?, ?, ?, ?, ?, ?)
      """

    val rows: List[Seq[Any]] = List(
      Seq(1, 100, 213, 200, 125, ExampleMahaService.today, "some comment")
    )

    rows.foreach {
      row =>
        val result = jdbcConnection.get.executeUpdate(insertSql, row)
        assert(result.isSuccess)
    }
    var count = 0
    jdbcConnection.get.queryForObject("select * from student_grade_sheet") {
      rs =>
        while (rs.next()) {
          count += 1
        }
    }
    assert(rows.size == count)
  }
} 
Example 86
Source File: StandaloneKCFTests.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.standalone

import java.nio.charset.StandardCharsets.UTF_8
import java.nio.file.Files

import common.WskProps
import org.apache.commons.io.FileUtils
import org.apache.openwhisk.core.containerpool.kubernetes.test.KubeClientSupport
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import system.basic.WskRestBasicTests

@RunWith(classOf[JUnitRunner])
class StandaloneKCFTests
    extends WskRestBasicTests
    with StandaloneServerFixture
    with StandaloneSanityTestSupport
    with KubeClientSupport {
  override implicit val wskprops = WskProps().copy(apihost = serverUrl)

  //Turn on to debug locally easily
  override protected val dumpLogsAlways = false

  override protected val dumpStartupLogs = false

  override protected def useMockServer = false

  override protected def supportedTests = Set("Wsk Action REST should invoke a blocking action and get only the result")

  override protected def extraArgs: Seq[String] = Seq("--dev-mode", "--dev-kcf")

  private val podTemplate = """---
                              |apiVersion: "v1"
                              |kind: "Pod"
                              |metadata:
                              |  annotations:
                              |    allow-outbound : "true"
                              |  labels:
                              |     launcher: standalone""".stripMargin

  private val podTemplateFile = Files.createTempFile("whisk", null).toFile

  override val customConfig = {
    FileUtils.write(podTemplateFile, podTemplate, UTF_8)
    Some(s"""include classpath("standalone-kcf.conf")
         |
         |whisk {
         |  kubernetes {
         |    pod-template = "${podTemplateFile.toURI}"
         |  }
         |}""".stripMargin)
  }

  override def afterAll(): Unit = {
    checkPodState()
    super.afterAll()
    podTemplateFile.delete()
  }

  def checkPodState(): Unit = {
    val podList = kubeClient.pods().withLabel("launcher").list()
    podList.getItems.isEmpty shouldBe false
  }
} 
Example 87
Source File: ConfigMapValueTests.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.common

import java.nio.charset.StandardCharsets.UTF_8
import java.nio.file.Files

import com.typesafe.config.ConfigFactory
import org.apache.commons.io.FileUtils
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{FlatSpec, Matchers}
import pureconfig._
import pureconfig.generic.auto._

@RunWith(classOf[JUnitRunner])
class ConfigMapValueTests extends FlatSpec with Matchers {
  behavior of "ConfigMapValue"

  case class ValueTest(template: ConfigMapValue, count: Int)

  it should "read from string" in {
    val config = ConfigFactory.parseString("""
       |whisk {
       |  value-test {
       |    template = "test string"
       |    count = 42
       |  }
       |}""".stripMargin)

    val valueTest = readValueTest(config)
    valueTest.template.value shouldBe "test string"
  }

  it should "read from file reference" in {
    val file = Files.createTempFile("whisk", null).toFile
    FileUtils.write(file, "test string", UTF_8)

    val config = ConfigFactory.parseString(s"""
       |whisk {
       |  value-test {
       |    template = "${file.toURI}"
       |    count = 42
       |  }
       |}""".stripMargin)

    val valueTest = readValueTest(config)
    valueTest.template.value shouldBe "test string"

    file.delete()
  }

  private def readValueTest(config: com.typesafe.config.Config) = {
    loadConfigOrThrow[ValueTest](config.getConfig("whisk.value-test"))
  }
} 
Example 88
Source File: ConfigMapValue.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.common

import java.io.File
import java.net.URI
import java.nio.charset.StandardCharsets.UTF_8

import org.apache.commons.io.FileUtils
import pureconfig.ConfigReader
import pureconfig.ConvertHelpers.catchReadError

class ConfigMapValue private (val value: String)

object ConfigMapValue {

  
  def apply(config: String): ConfigMapValue = {
    val value = if (config.startsWith("file:")) {
      val uri = new URI(config)
      val file = new File(uri)
      FileUtils.readFileToString(file, UTF_8)
    } else config
    new ConfigMapValue(value)
  }

  implicit val reader: ConfigReader[ConfigMapValue] = ConfigReader.fromString[ConfigMapValue](catchReadError(apply))
} 
Example 89
Source File: CollectionResourceUsage.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.database.cosmosdb

import org.apache.commons.io.FileUtils
import org.apache.openwhisk.core.entity.ByteSize
import org.apache.openwhisk.core.entity.SizeUnits.KB

case class CollectionResourceUsage(documentsSize: Option[ByteSize],
                                   collectionSize: Option[ByteSize],
                                   documentsCount: Option[Long],
                                   indexingProgress: Option[Int],
                                   documentsSizeQuota: Option[ByteSize]) {
  def indexSize: Option[ByteSize] = {
    for {
      ds <- documentsSize
      cs <- collectionSize
    } yield cs - ds
  }

  def asString: String = {
    List(
      documentsSize.map(ds => s"documentSize: ${displaySize(ds)}"),
      indexSize.map(is => s"indexSize: ${displaySize(is)}"),
      documentsCount.map(dc => s"documentsCount: $dc"),
      documentsSizeQuota.map(dq => s"collectionSizeQuota: ${displaySize(dq)}")).flatten.mkString(",")
  }

  private def displaySize(b: ByteSize) = FileUtils.byteCountToDisplaySize(b.toBytes)
}

object CollectionResourceUsage {
  val quotaHeader = "x-ms-resource-quota"
  val usageHeader = "x-ms-resource-usage"
  val indexHeader = "x-ms-documentdb-collection-index-transformation-progress"

  def apply(responseHeaders: Map[String, String]): Option[CollectionResourceUsage] = {
    for {
      quota <- responseHeaders.get(quotaHeader).map(headerValueToMap)
      usage <- responseHeaders.get(usageHeader).map(headerValueToMap)
    } yield {
      CollectionResourceUsage(
        usage.get("documentsSize").map(_.toLong).map(ByteSize(_, KB)),
        usage.get("collectionSize").map(_.toLong).map(ByteSize(_, KB)),
        usage.get("documentsCount").map(_.toLong),
        responseHeaders.get(indexHeader).map(_.toInt),
        quota.get("collectionSize").map(_.toLong).map(ByteSize(_, KB)))
    }
  }

  private def headerValueToMap(value: String): Map[String, String] = {
    //storedProcedures=100;triggers=25;functions=25;documentsCount=-1;documentsSize=xxx;collectionSize=xxx
    val pairs = value.split("=|;").grouped(2)
    pairs.map { case Array(k, v) => k -> v }.toMap
  }
} 
Example 90
Source File: InstallRouteMgmt.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.standalone

import java.io.File

import akka.http.scaladsl.model.Uri
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.openwhisk.common.TransactionId.systemPrefix
import org.apache.openwhisk.common.{Logging, TransactionId}

import scala.sys.process.ProcessLogger
import scala.util.Try
import scala.sys.process._

case class InstallRouteMgmt(workDir: File,
                            authKey: String,
                            apiHost: Uri,
                            namespace: String,
                            gatewayUrl: Uri,
                            wsk: String)(implicit log: Logging) {
  case class Action(name: String, desc: String)
  private val noopLogger = ProcessLogger(_ => ())
  private implicit val tid: TransactionId = TransactionId(systemPrefix + "apiMgmt")
  val actionNames = Array(
    Action("createApi", "Create an API"),
    Action("deleteApi", "Delete the API"),
    Action("getApi", "Retrieve the specified API configuration (in JSON format)"))

  def run(): Unit = {
    require(wskExists, s"wsk command not found at $wsk. Route management actions cannot be installed")
    log.info(this, packageUpdateCmd.!!.trim)
    //TODO Optimize to ignore this if package already installed
    actionNames.foreach { action =>
      val name = action.name
      val actionZip = new File(workDir, s"$name.zip")
      FileUtils.copyURLToFile(IOUtils.resourceToURL(s"/$name.zip"), actionZip)
      val cmd = createActionUpdateCmd(action, name, actionZip)
      val result = cmd.!!.trim
      log.info(this, s"Installed $name - $result")
      FileUtils.deleteQuietly(actionZip)
    }
    //This log message is used by tests to confirm that actions are installed
    log.info(this, "Installed Route Management Actions")
  }

  private def createActionUpdateCmd(action: Action, name: String, actionZip: File) = {
    Seq(
      wsk,
      "--apihost",
      apiHost.toString(),
      "--auth",
      authKey,
      "action",
      "update",
      s"$namespace/apimgmt/$name",
      actionZip.getAbsolutePath,
      "-a",
      "description",
      action.desc,
      "--kind",
      "nodejs:default",
      "-a",
      "web-export",
      "true",
      "-a",
      "final",
      "true")
  }

  private def packageUpdateCmd = {
    Seq(
      wsk,
      "--apihost",
      apiHost.toString(),
      "--auth",
      authKey,
      "package",
      "update",
      s"$namespace/apimgmt",
      "--shared",
      "no",
      "-a",
      "description",
      "This package manages the gateway API configuration.",
      "-p",
      "gwUrlV2",
      gatewayUrl.toString())
  }

  def wskExists: Boolean = Try(s"$wsk property get --cliversion".!(noopLogger)).getOrElse(-1) == 0
} 
Example 91
Source File: TestSpec.scala    From spark-distcp   with Apache License 2.0 5 votes vote down vote up
package com.coxautodata

import java.io.ByteArrayInputStream
import java.nio.file.Files

import com.coxautodata.objects.SerializableFileStatus
import com.coxautodata.utils.FileListing
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers}

trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach {

  var testingBaseDir: java.nio.file.Path = _
  var testingBaseDirName: String = _
  var testingBaseDirPath: Path = _
  var localFileSystem: LocalFileSystem = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    testingBaseDir = Files.createTempDirectory("test_output")
    testingBaseDirName = testingBaseDir.toString
    localFileSystem = FileSystem.getLocal(new Configuration())
    testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName))
  }

  override def afterEach(): Unit = {
    super.afterEach()
    FileUtils.deleteDirectory(testingBaseDir.toFile)
  }

  def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = {
    val path = new Path(testingBaseDirPath, relativePath)
    localFileSystem.mkdirs(path.getParent)
    val in = new ByteArrayInputStream(content)
    val out = localFileSystem.create(path)
    IOUtils.copy(in, out)
    in.close()
    out.close()
    SerializableFileStatus(localFileSystem.getFileStatus(path))
  }

  def fileStatusToResult(f: SerializableFileStatus): FileListing = {
    FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None)
  }

} 
Example 92
Source File: TestFolder.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.scheduler.driver

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.Suite

trait TestFolder extends Suite {
  self: Suite =>
  var testFolder: File = _
  var inputFolder: File = _
  var outputFolder: File = _

  def in = inputFolder.getAbsolutePath()

  def out = outputFolder.getAbsolutePath()

  private def deleteFile(file: File) {
    if (!file.exists) return
    if (file.isFile) {
      file.delete()
    } else {
      file.listFiles().foreach(deleteFile)
      file.delete()
    }
  }

  def /() = File.separator

  def createInputFile(path: String) {
    FileUtils.touch(new File(s"${inputFolder}${File.separator}${path}"))
  }

  def outputFile(path: String) = new File(outputPath(path))

  def inputFile(path: String) = new File(inputPath(path))

  def inputPath(path: String) = s"${in}${File.separator}${path}"

  def outputPath(path: String) = s"${out}${File.separator}${path}"

  abstract override def withFixture(test: NoArgTest) = {
    val tempFolder = System.getProperty("java.io.tmpdir")
    var folder: File = null

    do {
      folder = new File(tempFolder, "scalatest-" + System.nanoTime)
    } while (!folder.mkdir())

    testFolder = folder

    inputFolder = new File(testFolder, "in");
    inputFolder.mkdir()
    outputFolder = new File(testFolder, "out")
    outputFolder.mkdir()

    try {
      super.withFixture(test)
    } finally {
      deleteFile(testFolder)
    }
  }
} 
Example 93
Source File: JavaMetricsScreen.scala    From Pi-Akka-Cluster   with Apache License 2.0 5 votes vote down vote up
package akka_oled

import java.lang.management.ManagementFactory
import java.text.DecimalFormat

import com.sun.management.OperatingSystemMXBean
import org.apache.commons.io.FileUtils

import scala.collection.mutable

trait JavaMetricsScreen {
   def getJavaMetrics(): Array[Array[String]] = {
      val bean = ManagementFactory.getPlatformMXBean(classOf[OperatingSystemMXBean])
      val formatter = new DecimalFormat("#0.00")
      val map = mutable.LinkedHashMap[String, String](
         "Max mem:" -> FileUtils.byteCountToDisplaySize( ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax),
         "Curr mem:" -> FileUtils.byteCountToDisplaySize(ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed),
         "CPU:" -> (formatter.format(bean.getSystemCpuLoad) + "%"),
         "Threads:" -> ManagementFactory.getThreadMXBean.getThreadCount.toString,
         "Classes:" -> ManagementFactory.getClassLoadingMXBean.getLoadedClassCount.toString)
      map.toArray.map(x => Array(x._1, x._2))
   }
} 
Example 94
Source File: GraphFrameTestSparkContext.scala    From graphframes   with Apache License 2.0 5 votes vote down vote up
package org.graphframes

import java.io.File
import java.nio.file.Files

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SparkSession, SQLContext, SQLImplicits}

trait GraphFrameTestSparkContext extends BeforeAndAfterAll { self: Suite =>
  @transient var spark: SparkSession = _
  @transient var sc: SparkContext = _
  @transient var sqlContext: SQLContext = _
  @transient var sparkMajorVersion: Int = _
  @transient var sparkMinorVersion: Int = _

  
  def isLaterVersion(minVersion: String): Boolean = {
    val (minMajorVersion, minMinorVersion) = TestUtils.majorMinorVersion(minVersion)
    if (sparkMajorVersion != minMajorVersion) {
      return sparkMajorVersion > minMajorVersion
    } else {
      return sparkMinorVersion >= minMinorVersion
    }
  }

  override def beforeAll() {
    super.beforeAll()

    spark = SparkSession.builder()
      .master("local[2]")
      .appName("GraphFramesUnitTest")
      .config("spark.sql.shuffle.partitions", 4)
      .getOrCreate()

    val checkpointDir = Files.createTempDirectory(this.getClass.getName).toString
    spark.sparkContext.setCheckpointDir(checkpointDir)
    sc = spark.sparkContext
    sqlContext = spark.sqlContext

    val (verMajor, verMinor) = TestUtils.majorMinorVersion(sc.version)
    sparkMajorVersion = verMajor
    sparkMinorVersion = verMinor
  }

  override def afterAll() {
    val checkpointDir = sc.getCheckpointDir
    if (spark != null) {
      spark.stop()
    }
    spark = null
    sqlContext = null
    sc = null

    checkpointDir.foreach { dir =>
      FileUtils.deleteQuietly(new File(dir))
    }
    super.afterAll()
  }
} 
Example 95
Source File: PluginsFilesUtils.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.serving.core.utils

import java.io.File
import java.net.URL
import java.util.{Calendar, UUID}

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.serving.core.helpers.JarsHelper
import org.apache.commons.io.FileUtils

trait PluginsFilesUtils extends SLF4JLogging {

  def addPluginsToClassPath(pluginsFiles: Array[String]): Unit = {
    log.info(pluginsFiles.mkString(","))
    pluginsFiles.foreach(filePath => {
      log.info(s"Adding to classpath plugin file: $filePath")
      if (filePath.startsWith("/") || filePath.startsWith("file://")) addFromLocal(filePath)
      if (filePath.startsWith("hdfs")) addFromHdfs(filePath)
      if (filePath.startsWith("http")) addFromHttp(filePath)
    })
  }

  private def addFromLocal(filePath: String): Unit = {
    log.info(s"Getting file from local: $filePath")
    val file = new File(filePath.replace("file://", ""))
    JarsHelper.addToClasspath(file)
  }

  private def addFromHdfs(fileHdfsPath: String): Unit = {
    log.info(s"Getting file from HDFS: $fileHdfsPath")
    val inputStream = HdfsUtils().getFile(fileHdfsPath)
    val fileName = fileHdfsPath.split("/").last
    log.info(s"HDFS file name is $fileName")
    val file = new File(s"/tmp/sparta/userjars/${UUID.randomUUID().toString}/$fileName")
    log.info(s"Downloading HDFS file to local file system: ${file.getAbsoluteFile}")
    FileUtils.copyInputStreamToFile(inputStream, file)
    JarsHelper.addToClasspath(file)
  }

  private def addFromHttp(fileURI: String): Unit = {
    log.info(s"Getting file from HTTP: $fileURI")
    val tempFile = File.createTempFile(s"sparta-plugin-${Calendar.getInstance().getTimeInMillis}", ".jar")
    val url = new URL(fileURI)
    FileUtils.copyURLToFile(url, tempFile)
    JarsHelper.addToClasspath(tempFile)
  }
} 
Example 96
Source File: FileSystemOutputIT.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.output.filesystem

import java.io.File

import com.stratio.sparta.plugin.TemporalSparkContext
import com.stratio.sparta.plugin.output.fileSystem.FileSystemOutput
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import org.apache.commons.io.FileUtils
import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.junit.runner.RunWith
import org.scalatest.Matchers
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class FileSystemOutputIT extends TemporalSparkContext with Matchers {

  val directory = getClass().getResource("/origin.txt")
  val parentFile = new File(directory.getPath).getParent
  val properties = Map(("path", parentFile + "/testRow"), ("outputFormat", "row"))
  val fields = StructType(StructField("name", StringType, false) ::
    StructField("age", IntegerType, false) ::
    StructField("year", IntegerType, true) :: Nil)
  val fsm = new FileSystemOutput("key", properties)


  "An object of type FileSystemOutput " should "have the same values as the properties Map" in {
    fsm.outputFormat should be(OutputFormatEnum.ROW)
  }

  
  private def dfGen(): DataFrame = {
    val sqlCtx = SparkSession.builder().config(sc.getConf).getOrCreate()
    val dataRDD = sc.parallelize(List(("user1", 23, 1993), ("user2", 26, 1990), ("user3", 21, 1995)))
      .map { case (name, age, year) => Row(name, age, year) }

    sqlCtx.createDataFrame(dataRDD, fields)
  }

  def fileExists(path: String): Boolean = new File(path).exists()

  "Given a DataFrame, a directory" should "be created with the data written inside" in {
    fsm.save(dfGen(), SaveModeEnum.Append, Map(Output.TableNameKey -> "test"))
    fileExists(fsm.path.get) should equal(true)
  }

  it should "exist with the given path and be deleted" in {
    if (fileExists(fsm.path.get))
      FileUtils.deleteDirectory(new File(fsm.path.get))
    fileExists(fsm.path.get) should equal(false)
  }

  val fsm2 = new FileSystemOutput("key", properties.updated("outputFormat", "json")
    .updated("path", parentFile + "/testJson"))

  "Given another DataFrame, a directory" should "be created with the data inside in JSON format" in {
    fsm2.outputFormat should be(OutputFormatEnum.JSON)
    fsm2.save(dfGen(), SaveModeEnum.Append, Map(Output.TableNameKey -> "test"))
    fileExists(fsm2.path.get) should equal(true)
  }

  it should "exist with the given path and be deleted" in {
    if (fileExists(s"${fsm2.path.get}/test"))
      FileUtils.deleteDirectory(new File(s"${fsm2.path.get}/test"))
    fileExists(s"${fsm2.path.get}/test") should equal(false)
  }
} 
Example 97
Source File: MLAtlasEntityUtilsSuite.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas.types

import java.io.File

import org.apache.atlas.{AtlasClient, AtlasConstants}
import org.apache.atlas.model.instance.AtlasEntity
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.MinMaxScaler
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
import org.scalatest.{FunSuite, Matchers}
import com.hortonworks.spark.atlas.TestUtils._
import com.hortonworks.spark.atlas.{AtlasUtils, WithHiveSupport}

class MLAtlasEntityUtilsSuite extends FunSuite with Matchers with WithHiveSupport {

  def getTableEntity(tableName: String): AtlasEntity = {
    val dbDefinition = createDB("db1", "hdfs:///test/db/db1")
    val sd = createStorageFormat()
    val schema = new StructType()
      .add("user", StringType, false)
      .add("age", IntegerType, true)
    val tableDefinition = createTable("db1", s"$tableName", schema, sd)

    val tableEntities = internal.sparkTableToEntity(
      tableDefinition, AtlasConstants.DEFAULT_CLUSTER_NAME, Some(dbDefinition))
    val tableEntity = tableEntities.entity

    tableEntity
  }

  test("pipeline, pipeline model, fit and transform") {
    val uri = "/"
    val pipelineDir = "tmp/pipeline"
    val modelDir = "tmp/model"

    val pipelineDirEntity = internal.mlDirectoryToEntity(uri, pipelineDir)
    pipelineDirEntity.entity.getAttribute("uri") should be (uri)
    pipelineDirEntity.entity.getAttribute("directory") should be (pipelineDir)
    pipelineDirEntity.dependencies.length should be (0)

    val modelDirEntity = internal.mlDirectoryToEntity(uri, modelDir)
    modelDirEntity.entity.getAttribute("uri") should be (uri)
    modelDirEntity.entity.getAttribute("directory") should be (modelDir)
    modelDirEntity.dependencies.length should be (0)

    val df = sparkSession.createDataFrame(Seq(
      (1, Vectors.dense(0.0, 1.0, 4.0), 1.0),
      (2, Vectors.dense(1.0, 0.0, 4.0), 2.0),
      (3, Vectors.dense(1.0, 0.0, 5.0), 3.0),
      (4, Vectors.dense(0.0, 0.0, 5.0), 4.0)
    )).toDF("id", "features", "label")

    val scaler = new MinMaxScaler()
      .setInputCol("features")
      .setOutputCol("features_scaled")
      .setMin(0.0)
      .setMax(3.0)
    val pipeline = new Pipeline().setStages(Array(scaler))

    val model = pipeline.fit(df)

    pipeline.write.overwrite().save(pipelineDir)

    val pipelineEntity = internal.mlPipelineToEntity(pipeline.uid, pipelineDirEntity)
    pipelineEntity.entity.getTypeName should be (metadata.ML_PIPELINE_TYPE_STRING)
    pipelineEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (
      pipeline.uid)
    pipelineEntity.entity.getAttribute("name") should be (pipeline.uid)
    pipelineEntity.entity.getRelationshipAttribute("directory") should be (
      AtlasUtils.entityToReference(pipelineDirEntity.entity, useGuid = false))
    pipelineEntity.dependencies should be (Seq(pipelineDirEntity))

    val modelEntity = internal.mlModelToEntity(model.uid, modelDirEntity)
    val modelUid = model.uid.replaceAll("pipeline", "model")
    modelEntity.entity.getTypeName should be (metadata.ML_MODEL_TYPE_STRING)
    modelEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (modelUid)
    modelEntity.entity.getAttribute("name") should be (modelUid)
    modelEntity.entity.getRelationshipAttribute("directory") should be (
      AtlasUtils.entityToReference(modelDirEntity.entity, useGuid = false))

    modelEntity.dependencies should be (Seq(modelDirEntity))

    FileUtils.deleteDirectory(new File("tmp"))
  }
} 
Example 98
Source File: WithRemoteHiveMetastoreServiceSupport.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas

import java.io.File
import java.nio.file.Files

import com.hortonworks.spark.atlas.utils.SparkUtils
import com.hotels.beeju.ThriftHiveMetaStoreTestUtil
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, Suite}

trait WithRemoteHiveMetastoreServiceSupport extends BeforeAndAfterAll { self: Suite =>
  protected val dbName = "sac_hive_metastore"

  protected var sparkSession: SparkSession = _

  private var warehouseDir: String = _

  private val hive = new ThriftHiveMetaStoreTestUtil(dbName)

  private def cleanupAnyExistingSession(): Unit = {
    val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession)
    if (session.isDefined) {
      session.get.sessionState.catalog.reset()
      session.get.stop()
      SparkSession.clearActiveSession()
      SparkSession.clearDefaultSession()
    }
  }

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    cleanupAnyExistingSession()

    hive.before()

    warehouseDir = Files.createTempDirectory("sac-warehouse-").toString
    sparkSession = SparkSession.builder()
      .master("local")
      .appName(this.getClass.getCanonicalName)
      .enableHiveSupport()
      .config("spark.ui.enabled", "false")
      .config("spark.sql.warehouse.dir", warehouseDir)
      .config("spark.hadoop.hive.metastore.uris", hive.getThriftConnectionUri)
      .getOrCreate()

    // reset hiveConf to make sure the configuration change takes effect
    SparkUtils.resetHiveConf
  }

  override protected def afterAll(): Unit = {
    try {
      hive.after()
      sparkSession.sessionState.catalog.reset()
      sparkSession.stop()
      SparkSession.clearActiveSession()
      SparkSession.clearDefaultSession()
    } finally {
      // reset hiveConf again to prevent affecting other tests
      SparkUtils.resetHiveConf

      sparkSession = null
      FileUtils.deleteDirectory(new File(warehouseDir))
    }
    System.clearProperty("spark.driver.port")

    super.afterAll()
  }
} 
Example 99
Source File: WithHiveSupport.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas

import java.io.File
import java.nio.file.Files

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, Suite}

trait WithHiveSupport extends BeforeAndAfterAll { self: Suite =>

  protected var sparkSession: SparkSession = _

  private var metastoreDir: String = _
  private var warehouseDir: String = _

  private def cleanupAnyExistingSession(): Unit = {
    val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession)
    if (session.isDefined) {
      session.get.sessionState.catalog.reset()
      session.get.stop()
      SparkSession.clearActiveSession()
      SparkSession.clearDefaultSession()
    }
  }

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    cleanupAnyExistingSession()

    metastoreDir = Files.createTempDirectory("sac-metastore-").toString
    warehouseDir = Files.createTempDirectory("sac-warehouse-").toString
    System.setProperty("derby.system.home", metastoreDir)
    sparkSession = SparkSession.builder()
      .master("local")
      .appName(this.getClass.getCanonicalName)
      .enableHiveSupport()
      .config("spark.ui.enabled", "false")
      .config("spark.sql.warehouse.dir", warehouseDir)
      .getOrCreate()
  }

  override protected def afterAll(): Unit = {
    try {
      sparkSession.sessionState.catalog.reset()
      sparkSession.stop()
      SparkSession.clearActiveSession()
      SparkSession.clearDefaultSession()
    } finally {
      sparkSession = null
      FileUtils.deleteDirectory(new File(warehouseDir))
    }
    System.clearProperty("spark.driver.port")

    super.afterAll()
  }
} 
Example 100
Source File: DistServiceExecutor.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.distributeservice

import java.io.{File, FileWriter}
import java.net.InetAddress
import scala.collection.JavaConverters._
import scala.io.Source
import scala.sys.process._
import scala.util.{Failure, Success, Try}

import akka.actor.Actor
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.text.StrSubstitutor
import org.slf4j.Logger

import org.apache.gearpump.cluster.{ExecutorContext, UserConfig}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.InstallService
import org.apache.gearpump.util.{ActorUtil, LogUtil}

class DistServiceExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor {
  import executorContext._
  private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId)

  override def receive: Receive = {
    case InstallService(url, zipFileName, targetPath, scriptData, serviceName, serviceSettings) =>
      LOG.info(s"Executor $executorId receive command to install " +
        s"service $serviceName to $targetPath")
      unzipFile(url, zipFileName, targetPath)
      installService(scriptData, serviceName, serviceSettings)
  }

  private def unzipFile(url: String, zipFileName: String, targetPath: String) = {
    val zipFile = File.createTempFile(System.currentTimeMillis().toString, zipFileName)
    val dir = new File(targetPath)
    if (dir.exists()) {
      FileUtils.forceDelete(dir)
    }
    val bytes = FileServer.newClient.get(url).get
    FileUtils.writeByteArrayToFile(zipFile, bytes)
    val result = Try(s"unzip ${zipFile.getAbsolutePath} -d $targetPath".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor $executorId unzip file to $targetPath")
      case Failure(ex) => throw ex
    }
  }

  private def installService(
      scriptData: Array[Byte], serviceName: String, serviceSettings: Map[String, Any]) = {
    val tempFile = File.createTempFile("gearpump", serviceName)
    FileUtils.writeByteArrayToFile(tempFile, scriptData)
    val script = new File("/etc/init.d", serviceName)
    writeFileWithEnvVariables(tempFile, script, serviceSettings ++ getEnvSettings)
    val result = Try(s"chkconfig --add $serviceName".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor install service $serviceName successfully!")
      case Failure(ex) => throw ex
    }
  }

  private def getEnvSettings: Map[String, Any] = {
    Map("workerId" -> worker,
      "localhost" -> ActorUtil.getSystemAddress(context.system).host.get,
      "hostname" -> InetAddress.getLocalHost.getHostName)
  }

  private def writeFileWithEnvVariables(source: File, target: File, envs: Map[String, Any]) = {
    val writer = new FileWriter(target)
    val sub = new StrSubstitutor(envs.asJava)
    sub.setEnableSubstitutionInVariables(true)
    Source.fromFile(source).getLines().foreach(line => writer.write(sub.replace(line) + "\r\n"))
    writer.close()
  }
} 
Example 101
Source File: DistributeServiceClient.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.distributeservice

import java.io.File
import scala.concurrent.Future
import scala.util.{Failure, Success}

import akka.pattern.ask
import org.apache.commons.io.FileUtils

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.{FileContainer, GetFileContainer, InstallService}
import org.apache.gearpump.util.{AkkaApp, Constants}


object DistributeServiceClient extends AkkaApp with ArgumentsParser {
  implicit val timeout = Constants.FUTURE_TIMEOUT

  override val options: Array[(String, CLIOption[Any])] = Array(
    "appid" -> CLIOption[Int]("<the distributed shell appid>", required = true),
    "file" -> CLIOption[String]("<service zip file path>", required = true),
    "script" -> CLIOption[String](
      "<file path of service script that will be installed to /etc/init.d>", required = true),
    "serviceName" -> CLIOption[String]("<service name>", required = true),
    "target" -> CLIOption[String]("<target path on each machine>", required = true)
  )

  override def help(): Unit = {
    super.help()
    // scalastyle:off println
    Console.err.println(s"-D<name>=<value> set a property to the service")
    // scalastyle:on println
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(filterCustomOptions(args))
    val context = ClientContext(akkaConf)
    implicit val system = context.system
    implicit val dispatcher = system.dispatcher
    val appid = config.getInt("appid")
    val zipFile = new File(config.getString("file"))
    val script = new File(config.getString("script"))
    val serviceName = config.getString("serviceName")
    val appMaster = context.resolveAppID(appid)
    (appMaster ? GetFileContainer).asInstanceOf[Future[FileContainer]].map { container =>
      val bytes = FileUtils.readFileToByteArray(zipFile)
      val result = FileServer.newClient.save(container.url, bytes)
      result match {
        case Success(_) =>
          appMaster ! InstallService(container.url, zipFile.getName, config.getString("target"),
            FileUtils.readFileToByteArray(script), serviceName, parseServiceConfig(args))
          context.close()
        case Failure(ex) => throw ex
      }
    }
  }

  private def filterCustomOptions(args: Array[String]): Array[String] = {
    args.filter(!_.startsWith("-D"))
  }

  private def parseServiceConfig(args: Array[String]): Map[String, Any] = {
    val result = Map.empty[String, Any]
    args.foldLeft(result) { (result, argument) =>
      if (argument.startsWith("-D") && argument.contains("=")) {
        val fixedKV = argument.substring(2).split("=")
        result + (fixedKV(0) -> fixedKV(1))
      } else {
        result
      }
    }
  }
} 
Example 102
Source File: PersistenceSpec.scala    From 006877   with MIT License 5 votes vote down vote up
package akka.testkit


import java.io.File
import com.typesafe.config._

import scala.util._

import akka.actor._
import org.scalatest._

import org.apache.commons.io.FileUtils

abstract class PersistenceSpec(system: ActorSystem) extends TestKit(system)
  with ImplicitSender
  with WordSpecLike
  with Matchers
  with BeforeAndAfterAll
  with PersistenceCleanup {

  def this(name: String, config: Config) = this(ActorSystem(name, config))
  override protected def beforeAll() = deleteStorageLocations()

  override protected def afterAll() = {
    deleteStorageLocations()
    TestKit.shutdownActorSystem(system)
  }

  def killActors(actors: ActorRef*) = {
    actors.foreach { actor =>
      watch(actor)
      system.stop(actor)
      expectTerminated(actor)
      Thread.sleep(1000) // the actor name is not unique intermittently on travis when creating it again after killActors, this is ducktape.
    }
  }
}

trait PersistenceCleanup {
  def system: ActorSystem

  val storageLocations = List(
    "akka.persistence.journal.leveldb.dir",
    "akka.persistence.journal.leveldb-shared.store.dir",
    "akka.persistence.snapshot-store.local.dir").map { s =>
    new File(system.settings.config.getString(s))
  }

  def deleteStorageLocations(): Unit = {
    storageLocations.foreach(dir => Try(FileUtils.deleteDirectory(dir)))
  }
} 
Example 103
Source File: PersistenceSpec.scala    From 006877   with MIT License 5 votes vote down vote up
package akka.testkit


import java.io.File
import com.typesafe.config._

import scala.util._

import akka.actor._
import org.scalatest._

import org.apache.commons.io.FileUtils

abstract class PersistenceSpec(system: ActorSystem) extends TestKit(system)
  with ImplicitSender
  with WordSpecLike
  with Matchers
  with BeforeAndAfterAll
  with PersistenceCleanup {

  def this(name: String, config: Config) = this(ActorSystem(name, config))
  override protected def beforeAll() = deleteStorageLocations()

  override protected def afterAll() = {
    deleteStorageLocations()
    TestKit.shutdownActorSystem(system)
  }

  def killActors(actors: ActorRef*) = {
    actors.foreach { actor =>
      watch(actor)
      system.stop(actor)
      expectTerminated(actor)
      Thread.sleep(1000) // the actor name is not unique intermittently on travis when creating it again after killActors, this is ducktape.
    }
  }
}

trait PersistenceCleanup {
  def system: ActorSystem

  val storageLocations = List(
    "akka.persistence.journal.leveldb.dir",
    "akka.persistence.journal.leveldb-shared.store.dir",
    "akka.persistence.snapshot-store.local.dir").map(s => new File(system.settings.config.getString(s)))

  def deleteStorageLocations(): Unit = {
    storageLocations.foreach(dir => Try(FileUtils.deleteDirectory(dir)))
  }
} 
Example 104
Source File: ExternalCluster.scala    From incubator-livy   with Apache License 2.0 5 votes vote down vote up
package org.apache.livy.test.framework

import java.io._

import org.apache.commons.io.FileUtils
import org.apache.hadoop.fs.Path

import org.apache.livy.{LivyConf, Logging}
import org.apache.livy.client.common.TestUtils
import org.apache.livy.server.LivyServer

/**
 * Used to run tests on an real external cluster
 * In order to utilize test against an external cluster, you need to create
 * a configuration called cluster.spec and run the test suite with the option
 * -Dcluster.spec=<yourcluster.spec>
 *
 * Also, make sure to disable the following line InteractiveIT.scala
 * s.run("""sc.getConf.get("spark.executor.instances")""").verifyResult("res1: String = 1\n")
 *
 * This is because your external cluster may not have the same configuration as the MiniCluster
 *
 * See the cluster.spec.template file for an example cluster.spec
 */
class ExternalCluster(config: Map[String, String]) extends Cluster with Logging {
  private var _configDir: File = _

  private var _livyEndpoint: String = _
  private var _livyThriftJdbcUrl: Option[String] = _
  private var _hdfsScrathDir: Path = _

  private var _authScheme: String = _
  private var _user: String = _
  private var _password: String = _
  private var _sslCertPath: String = _

  private var _principal: String = _
  private var _keytabPath: String = _

  // Livy rest url endpoint
  override def livyEndpoint: String = _livyEndpoint

  // Livy jdbc url endpoint
  override def jdbcEndpoint: Option[String] = _livyThriftJdbcUrl

  // Temp directory in hdfs
  override def hdfsScratchDir(): Path = _hdfsScrathDir

  // Working directory that store core-site.xml, yarn-site.xml
  override def configDir(): File = _configDir

  // Security details
  override def authScheme: String = _authScheme
  override def user: String = _user
  override def password: String = _password
  override def sslCertPath: String = _sslCertPath

  override def principal: String = _principal
  override def keytabPath: String = _keytabPath

  override def doAsClusterUser[T](task: => T): T = task

  override def deploy(): Unit = {
    _configDir = new File(config.getOrElse("configDir", "hadoop-conf"))
    _livyEndpoint = config.getOrElse("livyEndpoint", "")

    _authScheme = config.getOrElse("authScheme", "")
    _user = config.getOrElse("user", "")
    _password = config.getOrElse("password", "")
    _sslCertPath = config.getOrElse("sslCertPath", "")
    _principal = config.getOrElse("principal", "")
    _keytabPath = config.getOrElse("keytabPath", "")

    // Needs to be set after all the other fields are filled in properly
    _hdfsScrathDir = fs.makeQualified(new Path(config.getOrElse("hdfsScratchDir", "/")))
  }

  override def cleanUp(): Unit = {
  }

  def runLivy(): Unit = {
  }

  def stopLivy(): Unit = {
  }
} 
Example 105
Source File: BaseInteractiveServletSpec.scala    From incubator-livy   with Apache License 2.0 5 votes vote down vote up
package org.apache.livy.server.interactive

import java.io.File
import java.nio.file.Files

import org.apache.commons.io.FileUtils
import org.apache.spark.launcher.SparkLauncher

import org.apache.livy.LivyConf
import org.apache.livy.rsc.RSCConf
import org.apache.livy.server.BaseSessionServletSpec
import org.apache.livy.sessions.{Kind, SessionKindModule, Spark}

abstract class BaseInteractiveServletSpec
  extends BaseSessionServletSpec[InteractiveSession, InteractiveRecoveryMetadata] {

  mapper.registerModule(new SessionKindModule())

  protected var tempDir: File = _

  override def afterAll(): Unit = {
    super.afterAll()
    if (tempDir != null) {
      scala.util.Try(FileUtils.deleteDirectory(tempDir))
      tempDir = null
    }
  }

  override protected def createConf(): LivyConf = synchronized {
    if (tempDir == null) {
      tempDir = Files.createTempDirectory("client-test").toFile()
    }
    super.createConf()
      .set(LivyConf.SESSION_STAGING_DIR, tempDir.toURI().toString())
      .set(LivyConf.REPL_JARS, "dummy.jar")
      .set(LivyConf.LIVY_SPARK_VERSION, sys.env("LIVY_SPARK_VERSION"))
      .set(LivyConf.LIVY_SPARK_SCALA_VERSION, sys.env("LIVY_SCALA_VERSION"))
  }

  protected def createRequest(
      inProcess: Boolean = true,
      extraConf: Map[String, String] = Map(),
      kind: Kind = Spark): CreateInteractiveRequest = {
    val classpath = sys.props("java.class.path")
    val request = new CreateInteractiveRequest()
    request.kind = kind
    request.name = None
    request.conf = extraConf ++ Map(
      RSCConf.Entry.LIVY_JARS.key() -> "",
      RSCConf.Entry.CLIENT_IN_PROCESS.key() -> inProcess.toString,
      SparkLauncher.SPARK_MASTER -> "local",
      SparkLauncher.DRIVER_EXTRA_CLASSPATH -> classpath,
      SparkLauncher.EXECUTOR_EXTRA_CLASSPATH -> classpath
    )
    request
  }

} 
Example 106
Source File: YarnShuffleIntegrationSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.io.File
import java.nio.charset.StandardCharsets

import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.scalatest.Matchers

import org.apache.spark._
import org.apache.spark.internal.Logging
import org.apache.spark.network.shuffle.ShuffleTestAccessor
import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
import org.apache.spark.tags.ExtendedYarnTest


@ExtendedYarnTest
class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {

  override def newYarnConfig(): YarnConfiguration = {
    val yarnConfig = new YarnConfiguration()
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
      classOf[YarnShuffleService].getCanonicalName)
    yarnConfig.set("spark.shuffle.service.port", "0")
    yarnConfig
  }

  test("external shuffle service") {
    val shuffleServicePort = YarnTestAccessor.getShuffleServicePort
    val shuffleService = YarnTestAccessor.getShuffleServiceInstance

    val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService)

    logInfo("Shuffle service port = " + shuffleServicePort)
    val result = File.createTempFile("result", null, tempDir)
    val finalState = runSpark(
      false,
      mainClassName(YarnExternalShuffleDriver.getClass),
      appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath),
      extraConf = Map(
        "spark.shuffle.service.enabled" -> "true",
        "spark.shuffle.service.port" -> shuffleServicePort.toString
      )
    )
    checkResult(finalState, result)
    assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists())
  }
}

private object YarnExternalShuffleDriver extends Logging with Matchers {

  val WAIT_TIMEOUT_MILLIS = 10000

  def main(args: Array[String]): Unit = {
    if (args.length != 2) {
      // scalastyle:off println
      System.err.println(
        s"""
        |Invalid command line: ${args.mkString(" ")}
        |
        |Usage: ExternalShuffleDriver [result file] [registered exec file]
        """.stripMargin)
      // scalastyle:on println
      System.exit(1)
    }

    val sc = new SparkContext(new SparkConf()
      .setAppName("External Shuffle Test"))
    val conf = sc.getConf
    val status = new File(args(0))
    val registeredExecFile = new File(args(1))
    logInfo("shuffle service executor file = " + registeredExecFile)
    var result = "failure"
    val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup")
    try {
      val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }.
        collect().toSet
      sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
      data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet)
      result = "success"
      // only one process can open a leveldb file at a time, so we copy the files
      FileUtils.copyDirectory(registeredExecFile, execStateCopy)
      assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty)
    } finally {
      sc.stop()
      FileUtils.deleteDirectory(execStateCopy)
      Files.write(result, status, StandardCharsets.UTF_8)
    }
  }

} 
Example 107
Source File: SortShuffleSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.rdd.ShuffledRDD
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.shuffle.sort.SortShuffleManager
import org.apache.spark.util.Utils

class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {

  // This test suite should run all tests in ShuffleSuite with sort-based shuffle.

  private var tempDir: File = _

  override def beforeAll() {
    super.beforeAll()
    conf.set("spark.shuffle.manager", "sort")
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    tempDir = Utils.createTempDir()
    conf.set("spark.local.dir", tempDir.getAbsolutePath)
  }

  override def afterEach(): Unit = {
    try {
      Utils.deleteRecursively(tempDir)
    } finally {
      super.afterEach()
    }
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the new serialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new KryoSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the old deserialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new JavaSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = {
    def getAllFiles: Set[File] =
      FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
    val filesBeforeShuffle = getAllFiles
    // Force the shuffle to be performed
    shuffledRdd.count()
    // Ensure that the shuffle actually created files that will need to be cleaned up
    val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
    filesCreatedByShuffle.map(_.getName) should be
    Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
    // Check that the cleanup actually removes the files
    sc.env.blockManager.master.removeShuffle(0, blocking = true)
    for (file <- filesCreatedByShuffle) {
      assert (!file.exists(), s"Shuffle file $file was not cleaned up")
    }
  }
} 
Example 108
Source File: EmbeddedIO.scala    From embedded-kafka   with Apache License 2.0 5 votes vote down vote up
package com.tuplejump.embedded.kafka

import java.io.{ File => JFile }

import scala.util.Try
import org.apache.commons.io.FileUtils

object EmbeddedIO extends Logging {

  private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]()

  val logsDir = new JFile(".", "logs")
  dirSetup(new JFile(logsDir.getAbsolutePath))

  
  def createTempDir(tmpName: String): JFile =
    dirSetup(new JFile(logsDir, tmpName))

  private def dirSetup(dir: JFile): JFile = {
    if (logsDir.exists()) deleteRecursively(logsDir)
    dir.mkdir

    logger.info(s"Created dir ${dir.getAbsolutePath.replace("./", "")}")

    registerShutdownDeleteDir(dir)

    sys.runtime.addShutdownHook(new Thread("delete temp dir " + dir) {
      override def run(): Unit = {
        if (!hasRootAsShutdownDeleteDir(dir)) deleteRecursively(dir)
      }
    })
    dir
  }

  protected def registerShutdownDeleteDir(file: JFile) {
    shutdownDeletePaths.synchronized {
      shutdownDeletePaths += file.getAbsolutePath
    }
  }

  private def hasRootAsShutdownDeleteDir(file: JFile): Boolean = {
    val absolutePath = file.getAbsolutePath
    shutdownDeletePaths.synchronized {
      shutdownDeletePaths.exists { path =>
        !absolutePath.equals(path) && absolutePath.startsWith(path)
      }
    }
  }

  protected def deleteRecursively(delete: JFile): Unit =
    for {
      file <- Option(delete)
    } Try(FileUtils.deleteDirectory(file))
} 
Example 109
Source File: SharedSparkSessionSuite.scala    From spark-tensorflow-connector   with Apache License 2.0 5 votes vote down vote up
package org.trustedanalytics.spark.datasources.tensorflow

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.spark.SharedSparkSession
import org.junit.{After, Before}
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}


trait BaseSuite extends WordSpecLike with Matchers with BeforeAndAfterAll

class SharedSparkSessionSuite extends SharedSparkSession with BaseSuite {
  val TF_SANDBOX_DIR = "tf-sandbox"
  val file = new File(TF_SANDBOX_DIR)

  @Before
  override def beforeAll() = {
    super.setUp()
    FileUtils.deleteQuietly(file)
    file.mkdirs()
  }

  @After
  override def afterAll() = {
    FileUtils.deleteQuietly(file)
    super.tearDown()
  }
} 
Example 110
Source File: TestCreateTableWithBlockletSize.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.createTable

import scala.util.Random

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.CarbonEnv
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.reader.CarbonFooterReaderV3
import org.apache.carbondata.core.util.path.CarbonTablePath


class TestCreateTableWithBlockletSize extends QueryTest with BeforeAndAfterAll {

  override def beforeAll {
    sql("use default")
    sql("drop table if exists source")
  }

  test("test create table with blocklet size") {
    val rdd = sqlContext.sparkContext.parallelize(1 to 1000000)
        .map(x => (Random.nextInt(), Random.nextInt().toString))
    sqlContext.createDataFrame(rdd)
        .write
        .format("carbondata")
        .option("table_blocksize", "8")
        .option("table_blocklet_size", "3")
        .option("tableName", "source")
        .save()

    // read footer and verify number of blocklets
    val table = CarbonEnv.getCarbonTable(None, "source")(sqlContext.sparkSession)
    val folder = FileFactory.getCarbonFile(table.getTablePath)
    val files = folder.listFiles(true)
    import scala.collection.JavaConverters._
    val dataFiles = files.asScala.filter(_.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT))
    dataFiles.foreach { dataFile =>
      val fileReader = FileFactory
        .getFileHolder(FileFactory.getFileType(dataFile.getPath))
      val buffer = fileReader
        .readByteBuffer(FileFactory.getUpdatedFilePath(dataFile.getPath), dataFile.getSize - 8, 8)
      val footerReader = new CarbonFooterReaderV3(dataFile.getAbsolutePath, buffer.getLong)
      val footer = footerReader.readFooterVersion3
      assertResult(2)(footer.blocklet_index_list.size)
      assertResult(2)(footer.blocklet_info_list3.size)
    }
    sql("drop table source")
  }

  test("test create table with invalid blocklet size") {
    val ex = intercept[MalformedCarbonCommandException] {
      sql("CREATE TABLE T1(name String) STORED AS CARBONDATA TBLPROPERTIES('TABLE_BLOCKLET_SIZE'='3X')")
    }
    assert(ex.getMessage.toLowerCase.contains("invalid table_blocklet_size"))
  }

  override def afterAll {
    sql("use default")
    sql("drop table if exists source")
  }

} 
Example 111
Source File: DirectSQLExample.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.examples

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.SparkSession

import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field}
import org.apache.carbondata.examples.util.ExampleUtils
import org.apache.carbondata.sdk.file.{CarbonWriter, Schema}


// scalastyle:off println
object DirectSQLExample {

  def main(args: Array[String]) {
    val carbonSession = ExampleUtils.createSparkSession("DirectSQLExample")
    exampleBody(carbonSession)
    carbonSession.close()
  }

  def exampleBody(carbonSession : SparkSession): Unit = {

    val rootPath = new File(this.getClass.getResource("/").getPath
      + "../../../..").getCanonicalPath
    val path = s"$rootPath/examples/spark/target/carbonFile/"

    import carbonSession._
    // 1. generate data file
    cleanTestData(path)

    val rows = 20
    buildTestData(path, rows)
    val readPath = path

    println("Running SQL on carbon files directly")
    try {
      // 2. run queries directly, no need to create table first
      sql(s"""select * FROM carbon.`$readPath` limit 10""".stripMargin).show()

      // 3. check rows count
      val counts = sql(s"""select * FROM carbon.`$readPath`""".stripMargin).count()
      assert(rows == counts)

    } catch {
      case e: Exception => throw e
    } finally {
      // 3.delete data files
      cleanTestData(path)
    }

  }

  // prepare SDK writer output
  def buildTestData(
      path: String,
      num: Int = 3): Unit = {

    // getCanonicalPath gives path with \, but the code expects /.
    val writerPath = path.replace("\\", "/")

    val fields = new Array[Field](3)
    fields(0) = new Field("name", DataTypes.STRING)
    fields(1) = new Field("age", DataTypes.INT)
    fields(2) = new Field("height", DataTypes.DOUBLE)

    try {
      val builder = CarbonWriter
        .builder()
        .outputPath(writerPath)
        .uniqueIdentifier(System.currentTimeMillis)
        .withBlockSize(2)
        .withCsvInput(new Schema(fields))
        .writtenBy("DirectSQLExample")
      val writer = builder.build()
      var i = 0
      while (i < num) {
        writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2)))
        i += 1
      }
      writer.close()
    } catch {
      case e: Exception => throw e
    }
  }

  def cleanTestData(path: String): Unit = {
    FileUtils.deleteDirectory(new File(path))
  }

}
// scalastyle:on println 
Example 112
Source File: TestRegisterIndexCarbonTable.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.secondaryindex

import java.io.{File, IOException}

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.test.TestQueryExecutor
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.constants.CarbonCommonConstants


class TestRegisterIndexCarbonTable extends QueryTest with BeforeAndAfterAll {

  override def beforeAll {
    sql("drop database if exists carbon cascade")
  }

  def restoreData(dblocation: String, tableName: String) = {
    val destination = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName
    val source = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName
    try {
      FileUtils.copyDirectory(new File(source), new File(destination))
      FileUtils.deleteDirectory(new File(source))
    } catch {
      case e : Exception =>
        throw new IOException("carbon table data restore failed.")
    } finally {

    }
  }
  def backUpData(dblocation: String, tableName: String) = {
    val source = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName
    val destination = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName
    try {
      FileUtils.copyDirectory(new File(source), new File(destination))
    } catch {
      case e : Exception =>
        throw new IOException("carbon table data backup failed.")
    }
  }
  test("register tables test") {
    val location = TestQueryExecutor.warehouse +
                           CarbonCommonConstants.FILE_SEPARATOR + "dbName"
    sql("drop database if exists carbon cascade")
    sql(s"create database carbon location '${location}'")
    sql("use carbon")
    sql("""create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) STORED AS carbondata""")
    sql("insert into carbontable select 'a',1,'aa','aaa'")
    sql("create index index_on_c3 on table carbontable (c3, c5) AS 'carbondata'")
    backUpData(location, "carbontable")
    backUpData(location, "index_on_c3")
    sql("drop table carbontable")
    restoreData(location, "carbontable")
    restoreData(location, "index_on_c3")
    sql("refresh table carbontable")
    sql("refresh table index_on_c3")
    checkAnswer(sql("select count(*) from carbontable"), Row(1))
    checkAnswer(sql("select c1 from carbontable"), Seq(Row("a")))
    sql("REGISTER INDEX TABLE index_on_c3 ON carbontable")
    assert(sql("show indexes on carbontable").collect().nonEmpty)
  }
  override def afterAll {
    sql("drop database if exists carbon cascade")
    sql("use default")
  }
} 
Example 113
Source File: services.scala    From InteractiveGraph-neo4j   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.grapheco.server.pidb

import java.io.{File, FileInputStream}

import org.apache.commons.io.{FileUtils, IOUtils}
import org.grapheco.server.util.{JsonUtils, Logging, ServletContextUtils}
import org.neo4j.driver.v1._
import org.neo4j.graphdb.factory.{GraphDatabaseFactory, GraphDatabaseSettings}
import org.neo4j.graphdb.{GraphDatabaseService, Label, RelationshipType}
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.beans.factory.{DisposableBean, InitializingBean}
import cn.pidb.engine.{BoltService, CypherService, PidbConnector}

import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.reflect.ClassTag



class PidbService(boltUrl:String, boltUser:String, boltPassword:String) extends BoltService(boltUrl, boltUser, boltPassword){


  def getRelativeOrAbsoluteFile(path: String) = {
    Some(new File(path)).map { file =>
      if (file.isAbsolute) {
        file
      }
      else {
        new File(ServletContextUtils.getServletContext.getRealPath(s"/${path}"))
      }
    }.get
  }
} 
Example 114
Source File: IntegrationTests.scala    From scala-typed-holes   with Apache License 2.0 5 votes vote down vote up
package holes

import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path, Paths}

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import scala.sys.process._

class IntegrationTests extends FunSpec with BeforeAndAfterAll {

  private val pluginJar = sys.props("plugin.jar")
  private val scalacClasspath = sys.props("scalac.classpath")
  private val targetDir = Paths.get("target/integration-tests")

  private def runScalac(args: String*): String = {
    val buf = new StringBuffer
    val logger = new ProcessLogger {
      override def out(s: => String): Unit = { buf.append(s); buf.append('\n') }
      override def err(s: => String): Unit = { buf.append(s); buf.append('\n') }
      override def buffer[T](f: => T): T = f
    }

    Process(
      "java"
        :: "-Dscala.usejavacp=true"
        :: "-cp" :: scalacClasspath
        :: "scala.tools.nsc.Main"
        :: args.toList
    ).!(logger)

    buf.toString
  }

  private def compileFile(path: Path): String =
    runScalac(
      s"-Xplugin:$pluginJar",
      "-P:typed-holes:log-level:info",
      "-d", targetDir.toString,
      path.toString
    )

  override def beforeAll(): Unit = {
    println(runScalac("-version"))

    FileUtils.deleteQuietly(targetDir.toFile)
    Files.createDirectories(targetDir)
  }

  describe("produces the expected output") {
    for (scenario <- Paths.get("src/test/resources").toFile.listFiles().toList.map(_.toPath)) {
      it(scenario.getFileName.toString) {
        val expected =
          new String(Files.readAllBytes(scenario.resolve("expected.txt")), StandardCharsets.UTF_8).trim
        val actual =
          compileFile(scenario.resolve("input.scala")).trim

        if (actual != expected) {
          println("Compiler output:")
          println("=====")
          println(actual)
          println("=====")
        }
        assert(actual === expected)
      }
    }
  }

} 
Example 115
Source File: Template.scala    From AppCrawler   with Apache License 2.0 5 votes vote down vote up
package com.testerhome.appcrawler

import java.io.File

import org.apache.commons.io.FileUtils
import org.fusesource.scalate.TemplateEngine

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source


class Template {

  val elements = mutable.HashMap[String, ListBuffer[Map[String, Any]]]()


  def getPageSource(url:String): Unit ={
    val page=Source.fromURL(s"${url}/source/xml").mkString
    val xml=DataObject.fromJson[Map[String, String]](page).getOrElse("value", "")
      .asInstanceOf[Map[String, String]].getOrElse("tree", "")
    val doc=XPathUtil.toDocument(xml)
    elements("Demo")=ListBuffer[Map[String, Any]]()
    elements("Demo")++=XPathUtil.getListFromXPath("//*[]", doc)

  }
  def read(path:String): Unit = {

    //val path = "/Users/seveniruby/projects/AppCrawlerSuite/AppCrawler/android_20170109145102/elements.yml"
    val store = (DataObject.fromYaml[URIElementStore](Source.fromFile(path).mkString)).elementStore

    store.foreach(s => {
      val reqDom = s._2.reqDom
      val url = s._2.element.url
      if (reqDom.size != 0) {
        val doc = XPathUtil.toDocument(reqDom)

        if (elements.contains(url) == false) {
          elements.put(url, ListBuffer[Map[String, Any]]())
        }
        elements(url) ++= XPathUtil.getListFromXPath("//*", doc)
        val tagsLimit=List("Image", "Button", "Text")
        elements(url) = elements(url)
          .filter(_.getOrElse("visible", "true")=="true")
          .filter(_.getOrElse("tag", "").toString.contains("StatusBar")==false)
          .filter(e=>tagsLimit.exists(t=>e.getOrElse("tag", "").toString.contains(t)))
          .distinct
      }

    })
  }

  def write(template:String, dir:String) {
    val engine = new TemplateEngine
    elements.foreach(e => {
      val file:String = e._1
      println(s"file=${file}")
      e._2.foreach(m => {
        val name = m("name")
        val value = m("value")
        val label = m("label")
        val xpath = m("xpath")
        println(s"name=${name} label=${label} value=${value} xpath=${xpath}")
      })

      val output = engine.layout(template, Map(
        "file" -> s"Template_${file.split('-').takeRight(1).head.toString}",
        "elements" -> elements(file))
      )
      println(output)

      val directory=new File(dir)
      if(directory.exists()==false){
        FileUtils.forceMkdir(directory)
      }
      println(s"template source directory = ${dir}")
      val appdex=template.split('.').takeRight(2).head
      scala.reflect.io.File(s"${dir}/${file}.${appdex}").writeAll(output)

    })

  }

} 
Example 116
Source File: Report.scala    From AppCrawler   with Apache License 2.0 5 votes vote down vote up
package com.testerhome.appcrawler

import org.apache.commons.io.FileUtils
import org.scalatest.tools.Runner

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.{Source, Codec}
import scala.reflect.io.File
import collection.JavaConversions._


    log.info(s"run ${cmdArgs.mkString(" ")}")
    Runner.run(cmdArgs)
    changeTitle()
  }

  def changeTitle(title:String=Report.title): Unit ={
    val originTitle="ScalaTest Results"
    val indexFile=reportPath+"/index.html"
    val newContent=Source.fromFile(indexFile).mkString.replace(originTitle, title)
    scala.reflect.io.File(indexFile).writeAll(newContent)
  }

}

object Report extends Report{
  var showCancel=false
  var title="AppCrawler"
  var master=""
  var candidate=""
  var reportDir=""
  var store=new URIElementStore


  def loadResult(elementsFile: String): URIElementStore ={
    DataObject.fromYaml[URIElementStore](Source.fromFile(elementsFile).mkString)
  }
} 
Example 117
Source File: TestGetClassFile.scala    From AppCrawler   with Apache License 2.0 5 votes vote down vote up
package com.testerhome.appcrawler.ut

import com.testerhome.appcrawler.plugin.FlowDiff
import com.testerhome.appcrawler.{DiffSuite, Report}
import org.apache.commons.io.FileUtils
import org.scalatest.Checkpoints.Checkpoint
import org.scalatest.{FunSuite, Matchers}


class TestGetClassFile extends FunSuite with Matchers{



  test("test checkpoints"){
    markup {
      """
        |dddddddd
      """.stripMargin
    }
    markup("xxxx")
    val cp = new Checkpoint()
    val (x, y) = (1, 2)
    cp { x should be < 0 }
    cp { y should be > 9 }
    cp.reportAll()
  }

  test("test markup"){
    markup {
      """
        |dddddddd
      """.stripMargin
    }
    markup("xxxx")

  }

  test("get class file"){
    val location=classOf[DiffSuite].getProtectionDomain.getCodeSource.getLocation
    println(location)
    val f=getClass.getResource("/com/xueqiu/qa/appcrawler/ut/TestDiffReport.class").getFile
    println(f)
    FileUtils.copyFile(new java.io.File(f), new java.io.File("/tmp/1.class"))



    println(getClass.getClassLoader.getResources("com/xueqiu/qa/appcrawler/ut/TestDiffReport.class"))
  }
} 
Example 118
Source File: Generator.scala    From play-soap   with Apache License 2.0 5 votes vote down vote up
package play.soap.docs

import java.io.File
import java.util.Collections

import org.apache.commons.io.FileUtils
import org.pegdown.ast.WikiLinkNode
import org.pegdown.VerbatimSerializer
import org.pegdown.LinkRenderer
import org.pegdown.Extensions
import org.pegdown.PegDownProcessor
import play.doc.PrettifyVerbatimSerializer
import play.twirl.api.Html

object Generator extends App {
  val outDir  = new File(args(0))
  val inDir   = new File(args(1))
  val inPages = args.drop(2)

  val parser = new PegDownProcessor(Extensions.ALL)
  val linkRenderer = new LinkRenderer {
    import LinkRenderer.Rendering
    override def render(node: WikiLinkNode) = {
      node.getText.split("\\|", 2) match {
        case Array(name)        => new Rendering(name + ".html", name)
        case Array(title, name) => new Rendering(name + ".html", title)
        case _                  => new Rendering(node.getText + ".html", node.getText)
      }
    }
  }
  val verbatimSerializer =
    Collections.singletonMap[String, VerbatimSerializer](VerbatimSerializer.DEFAULT, PrettifyVerbatimSerializer)

  val nav = Seq(
    "Home"                       -> "Home",
    "Using sbt WSDL"             -> "SbtWsdl",
    "Using the Play SOAP client" -> "PlaySoapClient",
    "Using JAX WS Handlers"      -> "Handlers",
    "Security"                   -> "Security"
  )
  val titleMap = nav.map(t => t._2 -> t._1).toMap

  // Ensure target directory exists
  outDir.mkdirs()

  inPages.foreach { name =>
    val inFile      = new File(inDir, name + ".md")
    val markdown    = FileUtils.readFileToString(inFile)
    val htmlSnippet = parser.markdownToHtml(markdown, linkRenderer, verbatimSerializer)
    val title       = titleMap.get(name)
    val htmlPage    = html.template(title, nav)(Html(htmlSnippet))
    FileUtils.writeStringToFile(new File(outDir, name + ".html"), htmlPage.body)
  }
} 
Example 119
Source File: UsesMasterSlaveServers.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package redis

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterAll, Suite}

import scala.concurrent.Await
import scala.concurrent.duration._


trait UsesMasterSlaveServers extends BeforeAndAfterAll with RedisProcessUtils { this: Suite =>

  val masterSlavePath: String = "masterSlave/" + System.currentTimeMillis()
  val masterSlaveDir: File = new File(masterSlavePath.replaceAllLiterally("/", File.separator))

  def masterName: String
  def ports: Seq[Int]
  def sentinelPorts: Seq[Int]

  lazy val addresses: Seq[NodeAddress] = ports.map(port => NodeAddress(port = port))
  lazy val sentinelAddresses: Seq[NodeAddress] = sentinelPorts.map(port => NodeAddress(port = port))

  var redisProcesses: Seq[RedisProcess] = _
  var sentinelProcesses: Seq[RedisProcess] = _

  protected def prepareDirectory(): Unit

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    prepareDirectory()
    val processesFut = Future.traverse(ports)(port => launchRedis(
      "--port", port.toString,
      "--daemonize", "no",
      "--pidfile", "redis.pid",
      "--dbfilename", "dump.rdb",
      "--dir", s"$masterSlavePath/$port"
    ))
    val sentinelsFut = Future.traverse(sentinelPorts)(port => launchSentinel(
      s"$masterSlavePath/$port/sentinel.conf",
      "--port", port.toString,
      "--daemonize", "no",
      "--pidfile", "redis.pid",
      "--dir", s"$masterSlavePath/$port"
    ))
    redisProcesses = Await.result(processesFut, 10.seconds)
    sentinelProcesses = Await.result(sentinelsFut, 10.seconds)
  }

  override protected def afterAll(): Unit = {
    Await.result(Future.traverse(redisProcesses ++ sentinelProcesses)(shutdownRedis), 10.seconds)
    FileUtils.deleteDirectory(masterSlaveDir)
    super.afterAll()
  }
} 
Example 120
Source File: UsesClusterServers.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package redis

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterAll, Suite}

import scala.concurrent.Await
import scala.concurrent.duration._


trait UsesClusterServers extends BeforeAndAfterAll with RedisProcessUtils { this: Suite =>

  val clusterPath: String = "cluster/" + System.currentTimeMillis()
  val clusterDir: File = new File(clusterPath.replaceAllLiterally("/", File.separator))

  def ports: Seq[Int]

  lazy val addresses: Seq[NodeAddress] = ports.map(port => NodeAddress(port = port))
  var redisProcesses: Seq[RedisProcess] = _

  protected def prepareDirectory(): Unit

  protected def slotKey(slot: Int): String = ClusterUtils.SlotKeys(slot)

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    prepareDirectory()
    redisProcesses = Await.result(Future.traverse(ports)(port => launchRedis(
      "--port", port.toString,
      "--daemonize", "no",
      "--pidfile", "redis.pid",
      "--dbfilename", "dump.rdb",
      "--dir", s"$clusterPath/$port",
      "--appendonly", "yes",
      "--appendfilename", "appendonly.aof",
      "--cluster-enabled", "yes",
      "--cluster-config-file", "nodes.conf"
    )), 10.seconds)
  }

  override protected def afterAll(): Unit = {
    Await.result(Future.traverse(redisProcesses)(shutdownRedis), 10.seconds)
    FileUtils.deleteDirectory(clusterDir)
    super.afterAll()
  }
} 
Example 121
Source File: MQTTSinkWordCount.scala    From bahir   with Apache License 2.0 5 votes vote down vote up
package org.apache.bahir.examples.sql.streaming.mqtt

import java.io.File

import org.apache.commons.io.FileUtils

import org.apache.spark.sql.SparkSession


object MQTTSinkWordCount  {
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off
      System.err.println("Usage: MQTTSinkWordCount <port> <brokerUrl> <topic>")
      // scalastyle:on
      System.exit(1)
    }

    val checkpointDir = System.getProperty("java.io.tmpdir") + "/mqtt-example/"
    // Remove checkpoint directory.
    FileUtils.deleteDirectory(new File(checkpointDir))

    val port = args(0)
    val brokerUrl = args(1)
    val topic = args(2)

    val spark = SparkSession.builder
      .appName("MQTTSinkWordCount").master("local[4]")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from local network socket.
    val lines = spark.readStream
      .format("socket")
      .option("host", "localhost").option("port", port)
      .load().select("value").as[String]

    // Split the lines into words.
    val words = lines.flatMap(_.split(" "))

    // Generate running word count.
    val wordCounts = words.groupBy("value").count()

    // Start publishing the counts to MQTT server.
    val query = wordCounts.writeStream
      .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSinkProvider")
      .option("checkpointLocation", checkpointDir)
      .outputMode("complete")
      .option("topic", topic)
      .option("localStorage", checkpointDir)
      .start(brokerUrl)

    query.awaitTermination()
  }
} 
Example 122
Source File: ElasticServer.scala    From nexus   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.commons.es.server.embed

import java.nio.file.Files
import java.util.Arrays._

import akka.http.scaladsl.model.Uri
import ch.epfl.bluebrain.nexus.commons.es.server.embed.ElasticServer.MyNode
import ch.epfl.bluebrain.nexus.util.{ActorSystemFixture, Randomness}
import org.apache.commons.io.FileUtils
import org.elasticsearch.common.settings.Settings
import org.elasticsearch.index.reindex.ReindexPlugin
import org.elasticsearch.node.Node
import org.elasticsearch.painless.PainlessPlugin
import org.elasticsearch.plugins.Plugin
import org.elasticsearch.transport.Netty4Plugin
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatest.BeforeAndAfterAll

import scala.jdk.CollectionConverters._
import scala.util.Try

// $COVERAGE-OFF$
abstract class ElasticServer
    extends ActorSystemFixture("ElasticServer")
    with AnyWordSpecLike
    with BeforeAndAfterAll
    with Randomness {

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    startElastic()
  }

  override protected def afterAll(): Unit = {
    stopElastic()
    super.afterAll()
  }

  val startPort = freePort()
  val endPort   = startPort + 100

  val esUri       = Uri(s"http://localhost:$startPort")
  implicit val ec = system.dispatcher

  private val clusterName = "elasticsearch"

  private val dataDir  = Files.createTempDirectory("elasticsearch_data_").toFile
  private val settings = Settings
    .builder()
    .put("path.home", dataDir.toString)
    .put("http.port", s"$startPort-$endPort")
    .put("http.cors.enabled", true)
    .put("cluster.name", clusterName)
    .put("http.type", "netty4")
    .build

  private lazy val node =
    new MyNode(settings, asList(classOf[Netty4Plugin], classOf[PainlessPlugin], classOf[ReindexPlugin]))

  def startElastic(): Unit = {
    node.start()
    ()
  }

  def stopElastic(): Unit = {
    node.close()
    Try(FileUtils.forceDelete(dataDir))
    ()
  }
}

object ElasticServer extends Randomness {

  import java.util

  import org.elasticsearch.node.InternalSettingsPreparer

  private class MyNode(preparedSettings: Settings, classpathPlugins: util.Collection[Class[_ <: Plugin]])
      extends Node(
        InternalSettingsPreparer
          .prepareEnvironment(preparedSettings, Map.empty[String, String].asJava, null, () => "elasticsearch"),
        classpathPlugins,
        true
      ) {}
}
// $COVERAGE-ON$ 
Example 123
Source File: TarFlowSpec.scala    From nexus   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.storage

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Path, Paths}

import akka.actor.ActorSystem
import akka.stream.alpakka.file.scaladsl.Directory
import akka.stream.scaladsl.{FileIO, Source}
import akka.testkit.TestKit
import akka.util.ByteString
import ch.epfl.bluebrain.nexus.storage.utils.{EitherValues, IOEitherValues, Randomness}
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.io.FileUtils
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues}

import scala.annotation.tailrec

class TarFlowSpec
    extends TestKit(ActorSystem("TarFlowSpec"))
    with AnyWordSpecLike
    with Matchers
    with IOEitherValues
    with Randomness
    with EitherValues
    with OptionValues
    with Inspectors
    with BeforeAndAfterAll {

  val basePath = Files.createTempDirectory("tarflow")
  val dir1     = basePath.resolve("one")
  val dir2     = basePath.resolve("two")

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.cleanDirectory(basePath.toFile)
    ()
  }

  type PathAndContent = (Path, String)

  "A TarFlow" should {

    Files.createDirectories(dir1)
    Files.createDirectories(dir2)

    def relativize(path: Path): String = basePath.getParent().relativize(path).toString

    "generate the byteString for a tar file correctly" in {
      val file1        = dir1.resolve("file1.txt")
      val file1Content = genString()
      val file2        = dir1.resolve("file3.txt")
      val file2Content = genString()
      val file3        = dir2.resolve("file3.txt")
      val file3Content = genString()
      val files        = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content)
      forAll(files) {
        case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue
      }
      val byteString   = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue
      val bytes        = new ByteArrayInputStream(byteString.toArray)
      val tar          = new TarArchiveInputStream(bytes)

      @tailrec def readEntries(
          tar: TarArchiveInputStream,
          entries: List[PathAndContent] = Nil
      ): List[PathAndContent] = {
        val entry = tar.getNextTarEntry
        if (entry == null) entries
        else {
          val data = Array.ofDim[Byte](entry.getSize.toInt)
          tar.read(data)
          readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries)
        }
      }
      val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "")
      val untarred    = readEntries(tar).map { case (path, content) => path.toString -> content }
      val expected    = files.map { case (path, content) => relativize(path) -> content } ++ directories
      untarred should contain theSameElementsAs expected
    }
  }

} 
Example 124
Source File: format_flow.scala    From scalabpe   with Apache License 2.0 5 votes vote down vote up
package scalabpe

import java.io._
import scala.collection.mutable.HashMap
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
import org.apache.commons.io.FileUtils
import scala.xml._
import scala.collection.mutable._
import scalabpe.core._
import org.apache.commons.lang.StringUtils
import Tools._



object FormatFlowTool {

    def help() {
        println(
"""
usage: scalabpe.FormatFlowTool [options] dirname
options:
    -h|--help               帮助信息
""")
    }

    def parseArgs(args:Array[String]):HashMapStringAny = {
        val map = HashMapStringAny()
        var i = 0
        val files = ArrayBufferString()
        while(i < args.size) {
            args(i) match {
                case "-h" | "--help" => 
                    return null
                case s if s.startsWith("-") => 
                    println("invalid option "+s)
                    return null
                case _ => 
                    files += args(i)
                    i += 1
            }
        }
        map.put("files",files)
        map
    }

    def main(args:Array[String]) {

        var params = parseArgs(args)
        if( params == null ) {
            help()
            return
        }
        var files = params.nls("files")
        if( files.size == 0 ) {
            help()
            return
        }

        var dir = files(0)
        if( !new File(dir).exists() ) {
            val p1 = "compose_conf"+File.separator+dir
            if( new File(p1).exists ) {
                dir = p1
            } else {
                println("not a valid dir, dir="+dir)
                return
            }
        }

        processDir(dir,params)
    }

    def processDir(dir:String,params:HashMapStringAny) {
        val files = new File(dir).listFiles.filter(_.getName.endsWith(".flow"))
        for(f <- files ) {
            processFile(dir,f.getName,params)
        }
    }

    def processFile(dir:String,f:String,params:HashMapStringAny) {
        val lines = readAllLines(dir+File.separator+f)
        // TODO
    }

} 
Example 125
Source File: NodeActor.scala    From ForestFlow   with Apache License 2.0 5 votes vote down vote up
package ai.forestflow.serving.cluster

import java.io.File

import akka.actor.{Actor, ActorLogging, ActorRef, Props, Timers}
import akka.cluster.Cluster
import akka.cluster.pubsub.DistributedPubSub
import akka.cluster.pubsub.DistributedPubSubMediator.Subscribe
import ai.forestflow.domain.CleanupLocalStorage
import org.apache.commons.io.FileUtils
import com.typesafe.scalalogging.LazyLogging
import ai.forestflow.utils.ThrowableImplicits._

import scala.util.{Failure, Success, Try}

/***
 * This actor is responsible for node-level (host-level) stuff that should be done on a per-node basis.
 * A good example of this is file system cleanup tasks.
 */
object NodeActor extends LazyLogging {
  
  def props(): Props =
    Props(new NodeActor)
      .withDispatcher("blocking-io-dispatcher")

  def cleanupLocalStorage(path: String): Unit = {
    val localDir = new File(path)
    val localDirExists = localDir.exists()
    logger.info(s"Cleaning up local storage: Local Directory: $localDir , exists? $localDirExists")
    if (localDirExists)
      Try(FileUtils.deleteDirectory(localDir)) match {
        case Success(_) => logger.info(s"Local Directory $localDir cleaned up successfully")
        case Failure(ex) => logger.error(s"Local Directory $localDir cleanup failed! Reason: ${ex.printableStackTrace}")
      }
  }
}

class NodeActor extends Actor
  with ActorLogging
  with Timers {

  
  implicit val cluster: Cluster = Cluster(context.system)
  val mediator: ActorRef = DistributedPubSub(context.system).mediator

  mediator ! Subscribe(classOf[CleanupLocalStorage].getSimpleName, self)

  override def receive: Receive = {
    case CleanupLocalStorage(path) =>
      NodeActor.cleanupLocalStorage(path)
  }
} 
Example 126
Source File: SentencePieceWrapper.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.ml.tensorflow.sentencepiece

import java.io.File
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession

class SentencePieceWrapper(
                            var sppModel: Array[Byte]
                          ) extends Serializable {

  @transient private var mspp: SentencePieceProcessor = _

  def getSppModel: SentencePieceProcessor = {

    if (mspp == null){
      val spp = new SentencePieceProcessor()
      spp.loadFromSerializedProto(sppModel)
      mspp = spp
    }
    mspp
  }

}

object SentencePieceWrapper {

  def read(
            path: String
          ): SentencePieceWrapper = {
    val byteArray = Files.readAllBytes(Paths.get(path))
    val sppWrapper = new SentencePieceWrapper(byteArray)
    val spp = new SentencePieceProcessor()
    spp.loadFromSerializedProto(byteArray)

    sppWrapper.mspp = spp
    sppWrapper
  }
}


trait WriteSentencePieceModel {
  def writeSentencePieceModel(
                               path: String,
                               spark: SparkSession,
                               spp: SentencePieceWrapper,
                               suffix: String, filename:String
                             ): Unit = {

    val uri = new java.net.URI(path.replaceAllLiterally("\\", "/"))
    val fs = FileSystem.get(uri, spark.sparkContext.hadoopConfiguration)

    // 1. Create tmp folder
    val tmpFolder = Files.createTempDirectory(UUID.randomUUID().toString.takeRight(12) + suffix)
      .toAbsolutePath.toString

    val sppFile = Paths.get(tmpFolder, filename).toString

    // 2. Save Tensorflow state
    FileUtils.writeByteArrayToFile(new File(sppFile), spp.sppModel)
    // 3. Copy to dest folder
    fs.copyFromLocalFile(new Path(sppFile), new Path(path))

    // 4. Remove tmp folder
    FileUtils.deleteDirectory(new File(tmpFolder))
  }
}

trait ReadSentencePieceModel {
  val sppFile: String

  def readSentencePieceModel(
                              path: String,
                              spark: SparkSession,
                              suffix: String
                            ): SentencePieceWrapper = {

    val uri = new java.net.URI(path.replaceAllLiterally("\\", "/"))
    val fs = FileSystem.get(uri, spark.sparkContext.hadoopConfiguration)

    // 1. Create tmp directory
    val tmpFolder = Files.createTempDirectory(UUID.randomUUID().toString.takeRight(12)+ suffix)
      .toAbsolutePath.toString

    // 2. Copy to local dir
    fs.copyToLocalFile(new Path(path, sppFile), new Path(tmpFolder))

    val sppModelFilePath = new Path(tmpFolder, sppFile)

    val byteArray = Files.readAllBytes(Paths.get(sppModelFilePath.toString))
    val sppWrapper = new SentencePieceWrapper(byteArray)
    sppWrapper
  }
} 
Example 127
Source File: TrainingHelper.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.util

import java.io.File
import java.nio.file.{Files, Paths, StandardCopyOption}
import java.sql.Timestamp
import java.util.Date

import com.johnsnowlabs.nlp.pretrained.ResourceType.ResourceType
import com.johnsnowlabs.nlp.pretrained.{ResourceMetadata, ResourceType}
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.util.MLWriter


object TrainingHelper {

  def saveModel(name: String,
                language: Option[String],
                libVersion: Option[Version],
                sparkVersion: Option[Version],
                modelWriter: MLWriter,
                folder: String,
                category: Option[ResourceType] = Some(ResourceType.NOT_DEFINED)
               ): Unit = {

    // 1. Get current timestamp
    val timestamp = new Timestamp(new Date().getTime)


    // 2. Save model to file
    val file = Paths.get(folder, timestamp.toString).toString.replaceAllLiterally("\\", "/")
    modelWriter.save(file)

    // 3. Zip file
    val tempzipFile = Paths.get(folder, timestamp + ".zip")
    ZipArchiveUtil.zip(file, tempzipFile.toString)

    // 4. Set checksum
    val checksum = FileHelper.generateChecksum(tempzipFile.toString)

    // 5. Create resource metadata
    val meta = new ResourceMetadata(name, language, libVersion, sparkVersion, true, timestamp, true, category = category, checksum)

    val zipfile = Paths.get(meta.fileName)

    // 6. Move the zip
    Files.move(tempzipFile, zipfile, StandardCopyOption.REPLACE_EXISTING)

    // 7. Remove original file
    try {
      FileUtils.deleteDirectory(new File(file))
    } catch {
      case _: java.io.IOException => //file lock may prevent deletion, ignore and continue
    }

      // 6. Add to metadata.json info about resource
      val metadataFile = Paths.get(folder, "metadata.json").toString
      ResourceMetadata.addMetadataToFile(metadataFile, meta)
    }
} 
Example 128
Source File: FileHelper.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.util

import java.io.{File, IOException}
import java.nio.charset.Charset
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import java.text.DecimalFormat

import org.apache.commons.io.FileUtils
object FileHelper {
  def writeLines(file: String, lines: Seq[String], encoding: String = "UTF-8"): Unit = {
    val writer = Files.newBufferedWriter(Paths.get(file), Charset.forName(encoding))
    try {
      var cnt = 0
      for (line <- lines) {
        writer.write(line)
        if (cnt > 0)
          writer.write(System.lineSeparator())
        cnt += 1
      }
    }
    catch {
      case ex: IOException =>
        ex.printStackTrace()
    }
    finally if (writer != null) writer.close()
  }

  def delete(file: String, throwOnError: Boolean = false): Unit = {
    val f = new File(file)
    if (f.exists()) {
      try {
        if (f.isDirectory)
          FileUtils.deleteDirectory(f)
        else
          FileUtils.deleteQuietly(f)
      }
      catch {
        case e: Exception =>
          if (throwOnError)
            throw e
          else
            FileUtils.forceDeleteOnExit(f)
      }
    }

  }

  def generateChecksum(path: String): String = {
    val arr = Files readAllBytes (Paths get path)
    val checksum = MessageDigest.getInstance("MD5") digest arr
    checksum.map("%02X" format _).mkString
  }

  def getHumanReadableFileSize(size: Long): String = {
    if (size <= 0) return "0"
    val units = Array[String]("B", "KB", "MB", "GB", "TB", "PB", "EB")
    val digitGroups = (Math.log10(size) / Math.log10(1024)).toInt
    new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + " " + units(digitGroups)
  }
} 
Example 129
Source File: Quickstart.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package example

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SparkSession, SQLContext}
import io.delta.tables._

import org.apache.spark.sql.functions._
import org.apache.commons.io.FileUtils
import java.io.File

object Quickstart {
  def main(args: Array[String]): Unit = {

    val spark = SparkSession
      .builder()
      .appName("Quickstart")
      .master("local[*]")
      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
      .getOrCreate()

    val file = new File("/tmp/delta-table")
    if (file.exists()) FileUtils.deleteDirectory(file)
    
    // Create a table
    println("Creating a table")
    val path = file.getCanonicalPath
    var data = spark.range(0, 5)
    data.write.format("delta").save(path)

    // Read table
    println("Reading the table")
    val df = spark.read.format("delta").load(path)
    df.show()

    // Upsert (merge) new data
    println("Upsert new data")
    val newData = spark.range(0, 20).toDF
    val deltaTable = DeltaTable.forPath(path)

    deltaTable.as("oldData")
      .merge(
        newData.as("newData"),
        "oldData.id = newData.id")
      .whenMatched
      .update(Map("id" -> col("newData.id")))
      .whenNotMatched
      .insert(Map("id" -> col("newData.id")))
      .execute()

    deltaTable.toDF.show()

    // Update table data
    println("Overwrite the table")
    data = spark.range(5, 10)
    data.write.format("delta").mode("overwrite").save(path)
    deltaTable.toDF.show()

    // Update every even value by adding 100 to it
    println("Update to the table (add 100 to every even value)")
    deltaTable.update(
      condition = expr("id % 2 == 0"),
      set = Map("id" -> expr("id + 100")))
    deltaTable.toDF.show()

    // Delete every even value
    deltaTable.delete(condition = expr("id % 2 == 0"))
    deltaTable.toDF.show()

    // Read old version of the data using time travel
    print("Read old data using time travel")
    val df2 = spark.read.format("delta").option("versionAsOf", 0).load(path)
    df2.show()

    // Cleanup
    FileUtils.deleteDirectory(file)
    spark.stop()
  }
} 
Example 130
Source File: QuickstartSQL.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package example

import org.apache.spark.sql.SparkSession
import io.delta.tables._

import org.apache.spark.sql.functions._
import org.apache.commons.io.FileUtils
import java.io.File

object QuickstartSQL {
  def main(args: Array[String]): Unit = {
    // Create Spark Conf
    val spark = SparkSession
      .builder()
      .appName("QuickstartSQL")
      .master("local[*]")
      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
      .getOrCreate()
    
    val tableName = "tblname"

    // Clear up old session
    spark.sql(s"DROP TABLE IF EXISTS $tableName")
    spark.sql(s"DROP TABLE IF EXISTS newData")

    try {
      // Create a table
      println("Creating a table")
      spark.sql(s"CREATE TABLE $tableName(id LONG) USING delta")
      spark.sql(s"INSERT INTO $tableName VALUES 0, 1, 2, 3, 4")

      // Read table
      println("Reading the table")
      spark.sql(s"SELECT * FROM $tableName").show()

      // Upsert (merge) new data
      println("Upsert new data")
      spark.sql("CREATE TABLE newData(id LONG) USING parquet")
      spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6")
      
      spark.sql(s"""MERGE INTO $tableName USING newData
          ON ${tableName}.id = newData.id
          WHEN MATCHED THEN
            UPDATE SET ${tableName}.id = newData.id
          WHEN NOT MATCHED THEN INSERT *
      """)

      spark.sql(s"SELECT * FROM $tableName").show()

      // Update table data
      println("Overwrite the table")
      spark.sql(s"INSERT OVERWRITE $tableName VALUES 5, 6, 7, 8, 9")
      spark.sql(s"SELECT * FROM $tableName").show()

      // Update every even value by adding 100 to it
      println("Update to the table (add 100 to every even value)")
      spark.sql(s"UPDATE $tableName SET id = (id + 100) WHERE (id % 2 == 0)")
      spark.sql(s"SELECT * FROM $tableName").show()

      // Delete every even value
      spark.sql(s"DELETE FROM $tableName WHERE (id % 2 == 0)")
      spark.sql(s"SELECT * FROM $tableName").show()

      // Read old version of the data using time travel
      print("Read old data using time travel")
      val df2 = spark.read.format("delta").option("versionAsOf", 0).table(tableName)
      df2.show()
    } finally {
      // Cleanup
      spark.sql(s"DROP TABLE IF EXISTS $tableName")
      spark.sql(s"DROP TABLE IF EXISTS newData")
      spark.stop()
    }
  }
} 
Example 131
Source File: QuickstartSQLOnPaths.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package example

import org.apache.spark.sql.SparkSession
import io.delta.tables._

import org.apache.spark.sql.functions._
import org.apache.commons.io.FileUtils
import java.io.File

object QuickstartSQLOnPaths {
  def main(args: Array[String]): Unit = {
    // Create Spark Conf
    val spark = SparkSession
      .builder()
      .appName("QuickstartSQLOnPaths")
      .master("local[*]")
      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
      .getOrCreate()
    
    val tablePath = new File("/tmp/delta-table")
    if (tablePath.exists()) FileUtils.deleteDirectory(tablePath)

    // Clear up old session
    spark.sql(s"DROP TABLE IF EXISTS newData")

    try {
      // Create a table
      println("Creating a table")
      spark.sql(s"CREATE TABLE delta.`$tablePath`(id LONG) USING delta")
      spark.sql(s"INSERT INTO delta.`$tablePath` VALUES 0, 1, 2, 3, 4")

      // Read table
      println("Reading the table")
      spark.sql(s"SELECT * FROM delta.`$tablePath`").show()

      // Upsert (merge) new data
      println("Upsert new data")
      spark.sql("CREATE TABLE newData(id LONG) USING parquet")
      spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6")
      
      spark.sql(s"""MERGE INTO delta.`$tablePath` data USING newData
          ON data.id = newData.id
          WHEN MATCHED THEN
            UPDATE SET data.id = newData.id
          WHEN NOT MATCHED THEN INSERT *
      """)

      spark.sql(s"SELECT * FROM delta.`$tablePath`").show()

      // Update table data
      println("Overwrite the table")
      spark.sql(s"INSERT OVERWRITE delta.`$tablePath` VALUES 5, 6, 7, 8, 9")
      spark.sql(s"SELECT * FROM delta.`$tablePath`").show()

      // Update every even value by adding 100 to it
      println("Update to the table (add 100 to every even value)")
      spark.sql(s"UPDATE delta.`$tablePath` SET id = (id + 100) WHERE (id % 2 == 0)")
      spark.sql(s"SELECT * FROM delta.`$tablePath`").show()

      // Delete every even value
      spark.sql(s"DELETE FROM delta.`$tablePath` WHERE (id % 2 == 0)")
      spark.sql(s"SELECT * FROM delta.`$tablePath`").show()
    } finally {
      // Cleanup
      spark.sql(s"DROP TABLE IF EXISTS newData")
      spark.stop()
    }
  }
} 
Example 132
Source File: Utilities.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package example

import java.io.File

import io.delta.tables.DeltaTable
import org.apache.commons.io.FileUtils

import org.apache.spark.sql.SparkSession

object Utilities {
  def main(args: Array[String]): Unit = {
    // Create a Spark Session with SQL enabled
    val spark = SparkSession
      .builder()
      .appName("Utilities")
      .master("local[*]")
      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
      // control the parallelism for vacuum
      .config("spark.sql.sources.parallelPartitionDiscovery.parallelism", "4")
      .getOrCreate()

    // Create a table
    println("Create a parquet table")
    val data = spark.range(0, 5)
    val file = new File("/tmp/parquet-table")
    val path = file.getAbsolutePath
    data.write.format("parquet").save(path)

    // Convert to delta
    println("Convert to Delta")
    DeltaTable.convertToDelta(spark, s"parquet.`$path`")

    // Read table as delta
    var df = spark.read.format("delta").load(path)

    // Read old version of data using time travel
    df = spark.read.format("delta").option("versionAsOf", 0).load(path)
    df.show()

    val deltaTable = DeltaTable.forPath(path)

    // Utility commands
    println("Vacuum the table")
    deltaTable.vacuum()

    println("Describe History for the table")
    deltaTable.history().show()

    // Generate manifest
    println("Generate Manifest files")
    deltaTable.generate("SYMLINK_FORMAT_MANIFEST")

    // SQL utility commands
    println("SQL Vacuum")
    spark.sql(s"VACUUM '$path' RETAIN 169 HOURS")

    println("SQL Describe History")
    println(spark.sql(s"DESCRIBE HISTORY '$path'").collect())

    // Cleanup
    FileUtils.deleteDirectory(new File(path))
    spark.stop()
  }
} 
Example 133
Source File: CodeGen.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.codegen

import java.io.File

import com.microsoft.ml.spark.codegen.Config._
import com.microsoft.ml.spark.core.env.FileUtilities._
import org.apache.commons.io.FileUtils
import org.apache.commons.io.FilenameUtils._

object CodeGen {

  def generateArtifacts(): Unit = {
    println(
      s"""|Running code generation with config:
          |  topDir:     $TopDir
          |  packageDir: $PackageDir
          |  pySrcDir:   $PySrcDir
          |  pyTestDir:  $PyTestDir
          |  rsrcDir:    $RSrcDir""".stripMargin)

    println("Creating temp folders")
    if (GeneratedDir.exists()) FileUtils.forceDelete(GeneratedDir)

    println("Generating python APIs")
    PySparkWrapperGenerator()
    println("Generating R APIs")
    SparklyRWrapperGenerator(Version)

    def toDir(f: File): File = new File(f, File.separator)

    //writeFile(new File(pySrcDir, "__init__.py"), packageHelp(""))
    FileUtils.copyDirectoryToDirectory(toDir(PySrcOverrideDir), toDir(PySrcDir))
    FileUtils.copyDirectoryToDirectory(toDir(PyTestOverrideDir), toDir(PyTestDir))
    makeInitFiles()

    // build init file
    // package python+r zip files
    // zipFolder(pyDir, pyZipFile)
    RPackageDir.mkdirs()
    zipFolder(RSrcDir, new File(RPackageDir, s"mmlspark-$Version.zip"))

    //FileUtils.forceDelete(rDir)
    // leave the python source files, so they will be included in the super-jar
    // FileUtils.forceDelete(pyDir)
  }

  private def makeInitFiles(packageFolder: String = ""): Unit = {
    val dir = new File(new File(PySrcDir,"mmlspark"), packageFolder)
    val packageString = if (packageFolder != "") packageFolder.replace("/",".") else ""
    val importStrings =
      dir.listFiles.filter(_.isFile).sorted
        .map(_.getName)
        .filter(name => name.endsWith(".py") && !name.startsWith("_") && !name.startsWith("test"))
        .map(name => s"from mmlspark$packageString.${getBaseName(name)} import *\n").mkString("")
    writeFile(new File(dir, "__init__.py"), packageHelp(importStrings))
    dir.listFiles().filter(_.isDirectory).foreach(f =>
      makeInitFiles(packageFolder +"/" + f.getName)
    )
  }

  def main(args: Array[String]): Unit = {
    generateArtifacts()
  }

} 
Example 134
Source File: DownloaderSuite.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.downloader

import java.io.File
import java.nio.file.Files

import com.microsoft.ml.spark.core.test.base.TestBase
import org.apache.commons.io.FileUtils

import scala.collection.JavaConversions._
import scala.concurrent.duration.Duration
import scala.util.Random

class DownloaderSuite extends TestBase {

  lazy val saveDir = Files.createTempDirectory("Models-").toFile
  lazy val d = new ModelDownloader(session, saveDir.toURI)

  test("retry utility should catch flakiness"){
    (1 to 20).foreach { i =>
      val result = FaultToleranceUtils.retryWithTimeout(20, Duration.apply(2, "seconds")) {
        val r = Random.nextDouble()
        if (r > .5) {
          println(s"$r failed")
          throw new IllegalArgumentException("Flakiness")
        } else if (r < .1){
          //Getting stuck
          val m = 3* 1e3.toLong
          println(s"$r Stuck for $m")
          Thread.sleep(m)
        }
        println(s"$r Success")
        5
      }
      assert(result === 5)
    }
  }

  test("A downloader should be able to download a model", TestBase.Extended) {
    val m = d.remoteModels.filter(_.name == "CNN").next()
    val schema = d.downloadModel(m)
    println(schema)
    assert(m.size == new File(schema.uri).length())
    assert(d.localModels.toList.length == 1)
  }

  ignore("A downloader should be able to get all Models " +
    "and maybeDownload should be fast if models are downloaded", TestBase.Extended) {
    val (modTimes, modTimes2) = FaultToleranceUtils.retryWithTimeout(10, Duration.apply(500, "seconds")) {
      d.downloadModels()
      val modTimes = d.localModels.map(s =>
        new File(s.uri).lastModified())

      d.downloadModels()
      val modTimes2 = d.localModels.map(s =>
        new File(s.uri).lastModified())
      (modTimes, modTimes2)
    }
    // No modification on second call because models are cached
    assert(modTimes.toList === modTimes2.toList)

    // the downloader's local models will reflect the change
    assert(d.localModels.toList.length == d.remoteModels.toList.length)

    // there will be a metadata file for every model
    assert(saveDir.list().count(_.endsWith(".meta")) == d.localModels.toList.length)
  }

  override def afterAll(): Unit = {
    if (saveDir.exists()) {
      FileUtils.forceDelete(saveDir)
    }
    super.afterAll()
  }

} 
Example 135
Source File: S3ObjectUploader.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.tools.neptune.export

import java.io._
import java.util
import java.util.concurrent.{Executors, TimeoutException}
import java.util.stream.Collectors
import java.util.{Collections, Vector}

import com.amazonaws.auth.profile.ProfileCredentialsProvider
import com.amazonaws.services.s3.AmazonS3ClientBuilder
import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest}
import com.amazonaws.{AmazonServiceException, ClientConfiguration, Protocol, SdkClientException}
import org.apache.commons.io.{FileUtils, IOUtils}
import org.slf4j.LoggerFactory

import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration.{FiniteDuration, _}

object S3ObjectUploader{

  val executor = Executors.newFixedThreadPool(1)
  implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.fromExecutor(executor)
  protected lazy val logger = LoggerFactory.getLogger("s3_uploader")


  def init(proxyHost:Option[String], proxyPort:Option[Int]) = {
    val clientRegion = "us-east-1"
    val config = new ClientConfiguration
    config.setProtocol(Protocol.HTTPS)
    proxyHost.foreach(host => config.setProxyHost(host))
    proxyPort.foreach(port =>  config.setProxyPort(port))
    val s3Client = AmazonS3ClientBuilder.standard()
      .withRegion(clientRegion)
      .withClientConfiguration(config)
      .withCredentials(new ProfileCredentialsProvider())
      .build()
    s3Client
  }


  def persistChunkToS3Bucket(chunkData:String, fileName:String, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String) = {
        try{
          init(proxyHost, proxyPort).putObject(s3Directory, fileName, chunkData)
      }
      catch {
        case e: AmazonServiceException =>
          e.printStackTrace()
          throw e
        case e: SdkClientException =>
          e.printStackTrace()
          throw e
      }
  }

  def persistChunkToS3Bucket(tmpFile:File, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String, retryCount:Int = 3):Unit = {
    try{
      val s3UploadTask = Future{init(proxyHost, proxyPort).putObject(s3Directory, tmpFile.getName, tmpFile)}(ec)
      Await.result(s3UploadTask,  5.minutes)
      tmpFile.delete()
    }
    catch {
      case e:TimeoutException =>
        if(retryCount > 0) {
          logger.error("S3 upload task run more than 5 minutes..Going to retry")
          persistChunkToS3Bucket(tmpFile, proxyHost, proxyPort, s3Directory, retryCount-1)
        }
        else{
          throw new Exception( "S3 upload task duration was more than 5 minutes")
        }
      case e: AmazonServiceException =>
        e.printStackTrace()
        throw e
      case e: SdkClientException =>
        e.printStackTrace()
        throw e
    }
  }

} 
Example 136
Source File: AnalyzeInconsistenciesResult.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.main

import java.io.File
import java.nio.charset.StandardCharsets.UTF_8

import cmwell.analytics.data.InfotonAndIndexWithSystemFields
import cmwell.analytics.util.Connector
import org.apache.commons.io.FileUtils
import org.apache.log4j.LogManager
import org.apache.spark.sql.{Column, DataFrame, Row}
import org.rogach.scallop.{ScallopConf, ScallopOption}

import scala.collection.breakOut

object AnalyzeInconsistenciesResult {

  def main(args: Array[String]): Unit = {

    val logger = LogManager.getLogger(AnalyzeInconsistenciesResult.getClass)

    try {

      object Opts extends ScallopConf(args) {

        val in: ScallopOption[String] = opt[String]("in", short = 'i', descr = "The path to read the (parquet) inconsistencies dataset from", required = true)
        val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the (csv) output to", required = true)
        val shell: ScallopOption[Boolean] = opt[Boolean]("spark-shell", short = 's', descr = "Run a Spark shell", required = false, default = Some(false))

        verify()
      }

      Connector(
        appName = "Analyze InfotonAndIndexWithSystemFields Output",
        sparkShell = Opts.shell()
      ).withSparkSessionDo { spark =>

        val ds: DataFrame = spark.read.parquet(Opts.in())

        import org.apache.spark.sql.functions._

        // A column expression that counts the number of failures for each constraint.
        // This will also include null counts, needed to interpret the results.
        val constraints: Seq[(String, Column)] = InfotonAndIndexWithSystemFields.constraints(ds).map { case (name, predicate) =>
          name -> sum(when(predicate, 0L).otherwise(1L)).as(name)
        }(breakOut)

        // Compute the failure counts
        val failureCounts: Row = ds.agg(constraints.head._2, constraints.tail.map(_._2): _*).head

        val results = for {
          i <- constraints.indices
          constraintName = constraints(i)._1
          failureCount = if (failureCounts.isNullAt(i)) 0 else failureCounts.getAs[Long](i)
        } yield s"$constraintName,$failureCount"

        FileUtils.write(new File(Opts.out()), "constraint,failures\n" + results.mkString("\n"), UTF_8)
      }
    }
    catch {
      case ex: Throwable =>
        logger.error(ex.getMessage, ex)
        System.exit(1)
    }
  }
} 
Example 137
Source File: ExtractFromParquet.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.data

import java.io.File
import java.nio.charset.StandardCharsets.UTF_8

import cmwell.analytics.util.Connector
import cmwell.analytics.util.StringUtil._
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.rogach.scallop.{ScallopConf, ScallopOption}

object ExtractFromParquet {

  def main(args: Array[String]): Unit = {

    object Opts extends ScallopConf(args) {

      val pathsToFind: ScallopOption[String] = opt[String]("paths-to-find", short = 'f', descr = "A file containing the list of paths to look for", required = true)
      val parquetData: ScallopOption[String] = opt[String]("parquet-file", short = 'p', descr = "A Parquet file containing the data; single string column rdfStatement", required = true)
      val extractedData: ScallopOption[String] = opt[String]("extracted-data", short = 'd', descr = "The file that extracted data will be written to (in nquads format)", required = true)
      val pathsNotFound: ScallopOption[String] = opt[String]("paths-not-found", short = 'n', descr = "The output file that any paths that were not found are written to", required = true)
      val pathsFound: ScallopOption[String] = opt[String]("paths-found", short = 'a', descr = "The output file containing the paths that we found are written to", required = true)

      verify()
    }

    Connector(sparkShell = true, appName = "Extract from parquet").withSparkSessionDo {
      spark: SparkSession =>

        val pathsToFind = Set(splitLines(FileUtils.readFileToString(new File(Opts.pathsToFind()), UTF_8)): _*)

        val ds: DataFrame = spark.read.parquet(Opts.parquetData())

        // Cheesy parsing of path from an RDF nquad, but sufficient for this purpose
        def extractPath(rdfStatement: String): String = rdfStatement.substring(7, rdfStatement.indexOf(">"))

        val statementsFound = ds.rdd.filter { row: Row =>

          val statement = row.getAs[String]("rdfStatement")
          val path = extractPath(statement)

          pathsToFind.contains(path)
        }.collect() // expect the result to be small, so collect is OK

        // Save all the paths that were not found to file - look for them in other files.
        val pathsFound: Set[String] = Set(statementsFound.map(row => extractPath(row.getString(0))): _*)
        println(s"There were ${pathsFound.size} paths found (out of ${pathsToFind.size}).")
        FileUtils.writeStringToFile(new File(Opts.pathsFound()), pathsFound.mkString("\n"), UTF_8, false)

        val pathsNotFound = pathsToFind.diff(pathsFound)
        println(s"There were ${pathsNotFound.size} paths not found.")
        FileUtils.writeStringToFile(new File(Opts.pathsNotFound()), pathsNotFound.mkString("\n"), UTF_8, false)

        // Save the RDF statements for the paths that were found
        val x = statementsFound.map(row => row.getString(0)).mkString("\n")
        FileUtils.writeStringToFile(new File(Opts.extractedData()), x, UTF_8, false)
    }
  }
} 
Example 138
Source File: DumpCompleteDocumentFromEs.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.main

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import cmwell.analytics.data.{DataWriterFactory, IndexWithCompleteDocument}
import cmwell.analytics.downloader.PartitionedDownloader
import cmwell.analytics.util.TimestampConversion.timestampConverter
import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints}
import org.apache.commons.io.FileUtils
import org.apache.log4j.LogManager
import org.rogach.scallop.{ScallopConf, ScallopOption}

import scala.concurrent.ExecutionContextExecutor

object DumpCompleteDocumentFromEs {

  def main(args: Array[String]): Unit = {

    val logger = LogManager.getLogger(DumpCompleteDocumentFromEs.getClass)

    // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors
    // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might
    // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side.
    val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2)

    implicit val system: ActorSystem = ActorSystem("dump-complete-document-from-es")
    implicit val executionContext: ExecutionContextExecutor = system.dispatcher
    implicit val actorMaterializer: ActorMaterializer = ActorMaterializer()

    try {

      object Opts extends ScallopConf(args) {

        val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false)
        val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism))

        val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-filter", short = 'c', descr = "Filter on current status", default = None)
        val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter)
        val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None)

        val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true)
        val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet"))
        val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true)

        verify()
      }

      val esContactPoint = FindContactPoints.es(Opts.url())
      val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_))
      val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead)

      // Calling script should clear output directory as necessary.

      val objectExtractor = IndexWithCompleteDocument
      val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out())

      PartitionedDownloader.runDownload(
        esTopology = esTopology,
        parallelism = Opts.parallelism(),

        currentOnly = Opts.currentOnly(),
        lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption,
        pathPrefixFilter = Opts.pathPrefixFilter.toOption,

        objectExtractor = objectExtractor,
        dataWriterFactory = dataWriterFactory,
        sourceFilter = false)

      // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion.
      FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile)
    }
    catch {
      case ex: Throwable =>
        logger.error(ex.getMessage, ex)
        System.exit(1)
    }
    finally {
      system.terminate()
    }
  }
} 
Example 139
Source File: DumpKeyFieldsFromEs.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.main

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import cmwell.analytics.data.{DataWriterFactory, IndexWithKeyFields}
import cmwell.analytics.downloader.PartitionedDownloader
import cmwell.analytics.util.TimestampConversion.timestampConverter
import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints}
import org.apache.commons.io.FileUtils
import org.apache.log4j.LogManager
import org.rogach.scallop.{ScallopConf, ScallopOption}

import scala.concurrent.ExecutionContextExecutor

object DumpKeyFieldsFromEs {

  def main(args: Array[String]): Unit = {

    val logger = LogManager.getLogger(DumpKeyFieldsFromEs.getClass)

    implicit val system: ActorSystem = ActorSystem("dump-key-fields-from-es")
    implicit val executionContext: ExecutionContextExecutor = system.dispatcher
    implicit val actorMaterializer: ActorMaterializer = ActorMaterializer()

    try {
      // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors
      // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might
      // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side.
      val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2)

      object Opts extends ScallopConf(args) {

        val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false)
        val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism))

        val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-only", short = 'c', descr = "Only download current uuids")
        val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter)
        val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None)

        val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet"))
        val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true)

        val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true)

        val sourceFilter: ScallopOption[Boolean] = toggle("source-filter", noshort = true, default = Some(true), prefix = "no-",
          descrNo = "Do not filter _source fields (workaround for bad index)", descrYes = "Use source filtering to reduce network traffic")

        verify()
      }

      val esContactPoint = FindContactPoints.es(Opts.url())
      val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_))
      val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead)

      // Calling script should clear output directory as necessary.

      val objectExtractor = IndexWithKeyFields
      val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out())

      PartitionedDownloader.runDownload(
        esTopology = esTopology,
        parallelism = Opts.parallelism(),

        currentOnly = Opts.currentOnly(),
        lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption,
        pathPrefixFilter = Opts.pathPrefixFilter.toOption,

        objectExtractor = objectExtractor,
        dataWriterFactory = dataWriterFactory,
        sourceFilter = Opts.sourceFilter())

      // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion.
      FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile)
    }
    catch {
      case ex: Throwable =>
        logger.error(ex.getMessage, ex)
        System.exit(1)
    }
    finally {
      system.terminate()
    }
  }
} 
Example 140
Source File: DumpUuidOnlyFromEs.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.main

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import cmwell.analytics.data.{DataWriterFactory, IndexWithUuidOnly}
import cmwell.analytics.downloader.PartitionedDownloader
import cmwell.analytics.util.TimestampConversion.timestampConverter
import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints}
import org.apache.commons.io.FileUtils
import org.apache.log4j.LogManager
import org.rogach.scallop.{ScallopConf, ScallopOption}

import scala.concurrent.ExecutionContextExecutor

object DumpUuidOnlyFromEs {

  def main(args: Array[String]): Unit = {

    val logger = LogManager.getLogger(DumpUuidOnlyFromEs.getClass)

    // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors
    // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might
    // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side.
    val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2)

    implicit val system: ActorSystem = ActorSystem("dump-uuid-only-from-es")
    implicit val executionContext: ExecutionContextExecutor = system.dispatcher
    implicit val actorMaterializer: ActorMaterializer = ActorMaterializer()

    try {

      object Opts extends ScallopConf(args) {

        val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false)
        val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism))

        val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-only", short = 'c', descr = "Only download current uuids")
        val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter)
        val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None)

        val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true)
        val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet"))
        val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true)

        val sourceFilter: ScallopOption[Boolean] = toggle("source-filter", noshort = true, default=Some(true), prefix = "no-",
          descrNo = "Do not filter _source fields (workaround for bad index)", descrYes = "Use source filtering to reduce network traffic")

        verify()
      }

      val esContactPoint = FindContactPoints.es(Opts.url())
      val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_))
      val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead)

      // Calling script should clear output directory as necessary.

      val objectExtractor = IndexWithUuidOnly
      val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out())

      PartitionedDownloader.runDownload(
        esTopology = esTopology,
        parallelism = Opts.parallelism(),

        currentOnly = Opts.currentOnly(),
        lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption,
        pathPrefixFilter = Opts.pathPrefixFilter.toOption,

        objectExtractor = objectExtractor,
        dataWriterFactory = dataWriterFactory,
        sourceFilter = Opts.sourceFilter())

      // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion.
      FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile)
    }
    catch {
      case ex: Throwable =>
        logger.error(ex.getMessage, ex)
        System.exit(1)
    }
    finally {
      system.terminate()
    }
  }
} 
Example 141
Source File: DumpSystemFieldsFromEs.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.main

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import cmwell.analytics.data.{DataWriterFactory, IndexWithSystemFields}
import cmwell.analytics.downloader.PartitionedDownloader
import cmwell.analytics.util.TimestampConversion.timestampConverter
import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints}
import org.apache.commons.io.FileUtils
import org.apache.log4j.LogManager
import org.rogach.scallop.{ScallopConf, ScallopOption}

import scala.concurrent.ExecutionContextExecutor

object DumpSystemFieldsFromEs {

  def main(args: Array[String]): Unit = {

    val logger = LogManager.getLogger(DumpSystemFieldsFromEs.getClass)

    implicit val system: ActorSystem = ActorSystem("dump-system-fields-from-es")
    implicit val executionContext: ExecutionContextExecutor = system.dispatcher
    implicit val actorMaterializer: ActorMaterializer = ActorMaterializer()

    try {
      // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors
      // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might
      // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side.
      val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2)

      object Opts extends ScallopConf(args) {

        val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false)
        val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism))

        val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-only", short = 'c', descr = "Only download current uuids")
        val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter)
        val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None)

        val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet"))
        val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true)
        val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true)

        val sourceFilter: ScallopOption[Boolean] = toggle("source-filter", noshort = true, default=Some(true), prefix = "no-",
          descrNo = "Do not filter _source fields (workaround for bad index)", descrYes = "Use source filtering to reduce network traffic")

        verify()
      }

      val esContactPoint = FindContactPoints.es(Opts.url())
      val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_))
      val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead)

      // Calling script should clear output directory as necessary.

      val objectExtractor = IndexWithSystemFields
      val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out())

      PartitionedDownloader.runDownload(
        esTopology = esTopology,
        parallelism = Opts.parallelism(),

        currentOnly = Opts.currentOnly(),
        lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption,
        pathPrefixFilter = Opts.pathPrefixFilter.toOption,

        objectExtractor = objectExtractor,
        dataWriterFactory = dataWriterFactory,
        sourceFilter = Opts.sourceFilter())

      // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion.
      FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile)
    }
    catch {
      case ex: Throwable =>
        logger.error(ex.getMessage, ex)
        System.exit(1)
    }
    finally {
      system.terminate()
    }
  }
} 
Example 142
Source File: DataWriterFactory.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.data

import java.io.File
import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import cmwell.analytics.util.Shard
import org.apache.avro.generic.GenericRecord
import org.apache.commons.io.FileUtils
import org.apache.parquet.hadoop.metadata.CompressionCodecName

import scala.concurrent.ExecutionContextExecutor

trait DataWriterFactory[T <: GenericRecord] {
  def apply(shard: Shard): DataWriter[T]
}

object DataWriterFactory {

  private val compressionCodec = CompressionCodecName.SNAPPY


  def file[T <: GenericRecord with CsvGenerator](format: String,
                                                 objectExtractor: ObjectExtractor[T],
                                                 outDirectory: String): Shard => DataWriter[T] = {

    val extension = s".$format" + (if (format == "parquet") s"${compressionCodec.getExtension}" else "")

    // Generate a meaningful file name for the target file name based on the source shard index name and shard number.
    (sourceShard: Shard) => {
      val outFile: File = Paths.get(outDirectory, s"part-r-${sourceShard.indexName}.${sourceShard.shard}$extension").toFile

      if (outFile.exists)
        FileUtils.forceDelete(outFile)

      new File(outFile.getParent).mkdirs()

      FileDataWriter[T](format, objectExtractor.schema, outFile.toString, compressionCodec)
    }
  }

  
  def index[T <: GenericRecord](indexMap: Map[String, String], // source-index -> target-index
                                esEndpoint: String)
                               (implicit system: ActorSystem,
                                executionContext: ExecutionContextExecutor,
                                actorMaterializer: ActorMaterializer
                               ): Shard => DataWriter[T] = {

    (sourceShard: Shard) => {
      val targetIndex = indexMap(sourceShard.indexName)
      new IndexDataWriter[T](indexName = targetIndex, esEndpoint = esEndpoint)
    }
  }
} 
Example 143
Source File: MetadataTest.scala    From spark-pagerank   with MIT License 5 votes vote down vote up
package com.soundcloud.spark.pagerank

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{ BeforeAndAfter, FunSuite, Matchers }

class MetadataTest
  extends FunSuite
  with BeforeAndAfter
  with Matchers
  with SparkTesting {

  val path = "target/test/MetadataTest"
  val metadata = Metadata(numVertices=1)

  before {
    FileUtils.deleteDirectory(new File(path))
  }

  test("save and load") {
    Metadata.save(spark, metadata, path)
    Metadata.load(spark, path) shouldBe (metadata)
  }
} 
Example 144
Source File: GraphBuilderAppTest.scala    From spark-pagerank   with MIT License 5 votes vote down vote up
package com.soundcloud.spark.pagerank

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{ BeforeAndAfter, FunSuite, Matchers }

class GraphBuilderAppTest
  extends FunSuite
  with BeforeAndAfter
  with Matchers
  with GraphTesting
  with SparkTesting {

  val path = "target/test/GraphBuilderAppTest"

  before {
    FileUtils.deleteDirectory(new File(path))
  }

  // TODO(jd): design a better integration test as this just runs the app without assertions
  test("integration test") {
    val options = new GraphBuilderApp.Options()
    options.output = path
    options.numPartitions = 1

    val input = spark.sparkContext.parallelize(Seq(
      (1, 5, 1.0),
      (2, 1, 1.0),
      (3, 1, 1.0),
      (4, 2, 1.0),
      (4, 3, 1.0),
      (5, 3, 1.0),
      (5, 4, 1.0)
    ).map(_.productIterator.toSeq.mkString("\t")))

    GraphBuilderApp.runFromInputs(options, spark, input)
  }
} 
Example 145
Source File: PageRankAppTest.scala    From spark-pagerank   with MIT License 5 votes vote down vote up
package com.soundcloud.spark.pagerank

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.spark.storage.StorageLevel
import org.scalatest.{ BeforeAndAfter, Matchers, FunSuite }

class PageRankAppTest
  extends FunSuite
  with BeforeAndAfter
  with Matchers
  with GraphTesting
  with SparkTesting {

  val path = "target/test/PageRankAppTest"

  before {
    FileUtils.deleteDirectory(new File(path))
  }

  // TODO(jd): design a better integration test as this just runs the app without assertions
  test("integration test") {
    val options = new PageRankApp.Options()
    options.output = path

    val numVertices = 5
    val prior = 1.0 / numVertices
    val stats = Seq(s"numVertices,$numVertices")

    val edges = spark.sparkContext.parallelize(Seq[OutEdgePair](
      // node 1 is dangling
      (2, OutEdge(1, 1.0)),
      (3, OutEdge(1, 1.0)),
      (4, OutEdge(2, 0.5)),
      (4, OutEdge(3, 0.5)),
      (5, OutEdge(3, 0.5)),
      (5, OutEdge(4, 0.5))
    ))
    val vertices = spark.sparkContext.parallelize(Seq[RichVertexPair](
      (1, VertexMetadata(prior, true)),
      (2, VertexMetadata(prior, false)),
      (3, VertexMetadata(prior, false)),
      (4, VertexMetadata(prior, false)),
      (5, VertexMetadata(prior, false))
    ))
    val graph = PageRankGraph(
      numVertices,
      edges.persist(StorageLevel.MEMORY_ONLY),
      vertices.persist(StorageLevel.MEMORY_ONLY)
    )

    PageRankApp.runFromInputs(
      spark,
      options,
      graph,
      priorsOpt = None
    )
  }
} 
Example 146
Source File: YarnShuffleIntegrationSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.io.File
import java.nio.charset.StandardCharsets

import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.scalatest.Matchers

import org.apache.spark._
import org.apache.spark.internal.Logging
import org.apache.spark.network.shuffle.ShuffleTestAccessor
import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
import org.apache.spark.tags.ExtendedYarnTest


@ExtendedYarnTest
class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {

  override def newYarnConfig(): YarnConfiguration = {
    val yarnConfig = new YarnConfiguration()
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
    yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
      classOf[YarnShuffleService].getCanonicalName)
    yarnConfig.set("spark.shuffle.service.port", "0")
    yarnConfig
  }

  test("external shuffle service") {
    val shuffleServicePort = YarnTestAccessor.getShuffleServicePort
    val shuffleService = YarnTestAccessor.getShuffleServiceInstance

    val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService)

    logInfo("Shuffle service port = " + shuffleServicePort)
    val result = File.createTempFile("result", null, tempDir)
    val finalState = runSpark(
      false,
      mainClassName(YarnExternalShuffleDriver.getClass),
      appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath),
      extraConf = Map(
        "spark.shuffle.service.enabled" -> "true",
        "spark.shuffle.service.port" -> shuffleServicePort.toString
      )
    )
    checkResult(finalState, result)
    assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists())
  }
}

private object YarnExternalShuffleDriver extends Logging with Matchers {

  val WAIT_TIMEOUT_MILLIS = 10000

  def main(args: Array[String]): Unit = {
    if (args.length != 2) {
      // scalastyle:off println
      System.err.println(
        s"""
        |Invalid command line: ${args.mkString(" ")}
        |
        |Usage: ExternalShuffleDriver [result file] [registered exec file]
        """.stripMargin)
      // scalastyle:on println
      System.exit(1)
    }

    val sc = new SparkContext(new SparkConf()
      .setAppName("External Shuffle Test"))
    val conf = sc.getConf
    val status = new File(args(0))
    val registeredExecFile = new File(args(1))
    logInfo("shuffle service executor file = " + registeredExecFile)
    var result = "failure"
    val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup")
    try {
      val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }.
        collect().toSet
      sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
      data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet)
      result = "success"
      // only one process can open a leveldb file at a time, so we copy the files
      FileUtils.copyDirectory(registeredExecFile, execStateCopy)
      assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty)
    } finally {
      sc.stop()
      FileUtils.deleteDirectory(execStateCopy)
      Files.write(result, status, StandardCharsets.UTF_8)
    }
  }

} 
Example 147
Source File: SortShuffleSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.rdd.ShuffledRDD
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.shuffle.sort.SortShuffleManager
import org.apache.spark.util.Utils

class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {

  // This test suite should run all tests in ShuffleSuite with sort-based shuffle.

  private var tempDir: File = _

  override def beforeAll() {
    super.beforeAll()
    conf.set("spark.shuffle.manager", "sort")
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    tempDir = Utils.createTempDir()
    conf.set("spark.local.dir", tempDir.getAbsolutePath)
  }

  override def afterEach(): Unit = {
    try {
      Utils.deleteRecursively(tempDir)
    } finally {
      super.afterEach()
    }
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the new serialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new KryoSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") {
    sc = new SparkContext("local", "test", conf)
    // Create a shuffled RDD and verify that it actually uses the old deserialized map output path
    val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
    val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
      .setSerializer(new JavaSerializer(conf))
    val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
    assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep))
    ensureFilesAreCleanedUp(shuffledRdd)
  }

  private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = {
    def getAllFiles: Set[File] =
      FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
    val filesBeforeShuffle = getAllFiles
    // Force the shuffle to be performed
    shuffledRdd.count()
    // Ensure that the shuffle actually created files that will need to be cleaned up
    val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
    filesCreatedByShuffle.map(_.getName) should be
    Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
    // Check that the cleanup actually removes the files
    sc.env.blockManager.master.removeShuffle(0, blocking = true)
    for (file <- filesCreatedByShuffle) {
      assert (!file.exists(), s"Shuffle file $file was not cleaned up")
    }
  }
} 
Example 148
Source File: TransformerSerialization.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperables.spark.wrappers.transformers

import java.nio.file.{Files, Path}

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfter, Suite}

import ai.deepsense.deeplang.doperables.Transformer
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.{DeeplangIntegTestSupport, ExecutionContext}

trait TransformerSerialization extends Suite with BeforeAndAfter {

  var tempDir: Path = _

  before {
    tempDir = Files.createTempDirectory("writeReadTransformer")
  }

  after {
    FileUtils.deleteDirectory(tempDir.toFile)
  }
}

object TransformerSerialization {

  implicit class TransformerSerializationOps(private val transformer: Transformer) {

    def applyTransformationAndSerialization(
        path: Path,
        df: DataFrame)(implicit executionContext: ExecutionContext): DataFrame = {
      val result = transformer._transform(executionContext, df)
      val deserialized = loadSerializedTransformer(path)
      val resultFromSerializedTransformer = deserialized._transform(executionContext, df)
      DeeplangIntegTestSupport.assertDataFramesEqual(result, resultFromSerializedTransformer)
      result
    }

    def loadSerializedTransformer(
        path: Path)(
        implicit executionContext: ExecutionContext): Transformer = {
      val outputPath: Path = path.resolve(this.getClass.getName)
      transformer.save(executionContext, outputPath.toString)
      Transformer.load(executionContext, outputPath.toString)
    }
  }
} 
Example 149
Source File: MavenAddManagedDependenciesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import com.ebay.rtran.maven.util.MavenModelUtil
import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}


class MavenAddManagedDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenAddManagedDependenciesRule" should "be able to add dependencies to dependency management" in {
    val ruleConfig = MavenAddManagedDependenciesRuleConfig(
      Set(
        SimpleDependency("org.slf4j", "slf4j-api", Some("1.7.12")),
        SimpleDependency("com.typesafe.akka", "akka-actor_2.11", Some("2.3.9"))
      )
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenAddManagedDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    val parent = transformed.parents.head
    val dm1 = parent.managedDependencies.values.find(_.getArtifactId == "slf4j-api")
    dm1 should not be None
    dm1.get.getVersion should be ("1.7.12")
    val dm2 = parent.managedDependencies.values.find(_.getArtifactId == "akka-actor_2.11")
    dm2 should not be None
    dm2.get.getVersion should be ("2.4.17")
  }
} 
Example 150
Source File: MultiModuleMavenModelProviderTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.{File, FileReader}

import org.apache.commons.io.FileUtils
import org.apache.maven.model.io.xpp3.MavenXpp3Reader
import org.codehaus.plexus.util.xml.Xpp3Dom
import org.scalatest.{FlatSpecLike, Matchers}

import collection.JavaConversions._


class MultiModuleMavenModelProviderTest extends FlatSpecLike with Matchers {

  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)

  "MavenModelProvider" should "resolve all the pom files in the project" in {
    val projectCtx = new MavenProjectCtx(projectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider.create(projectCtx)

    model.modules foreach { m =>
      m.resolvedDependencies foreach {dep =>
        Option(dep.getVersion) should not be None
      }
    }
  }

  "MavenModelProvider" should "resolve all the pom files recursively in the project" in {
    val dir = new File(getClass.getClassLoader.getResource("recursive").getFile)
    val projectCtx = new MavenProjectCtx(dir)
    val provider = new MultiModuleMavenModelProvider
    val model = provider.create(projectCtx)
    model.modules.size should be (5)
  }

  "MavenModelProvider" should "not remove empty property nodes" in {
    val dir = new File(projectRoot.getParent, projectRoot.getName + "-bak")
    FileUtils.deleteQuietly(dir)
    FileUtils.copyDirectory(projectRoot, dir)
    val projectCtx = new MavenProjectCtx(dir)
    val provider = new MultiModuleMavenModelProvider
    val model = provider.create(projectCtx)

    provider save model

    val pom = new MavenXpp3Reader().read(new FileReader(new File(dir, "pom.xml")))
    pom.getProperties.getProperty("empty.property1") should be ("")
    pom.getProperties.getProperty("empty.property2") should be ("")
    pom.getProperties.getProperty("empty.property3") should be ("")
  }

  "MavenModelProvider" should "not break on xlint element" in {
    val dir = new File(projectRoot.getParent, projectRoot.getName + "-bak")
    FileUtils.deleteQuietly(dir)
    FileUtils.copyDirectory(projectRoot, dir)
    val projectCtx = new MavenProjectCtx(dir)
    val provider = new MultiModuleMavenModelProvider
    val model = provider.create(projectCtx)

    for {
      root <- model.parents.headOption
      build <- Option(root.pomModel.getBuild)
      sourcePlugin <- build.getPlugins.find(_.getArtifactId == "some-maven-plugin")
    } {
      build.removePlugin(sourcePlugin)
    }
    provider save model

    val pom = new MavenXpp3Reader().read(new FileReader(new File(dir, "pom.xml")))
    pom.getBuild.getPlugins.size() should be(1)
    val plugin = pom.getBuild.getPlugins.find(_.getArtifactId == "maven-source-plugin")
    plugin shouldNot be(None)
    plugin.map(_.getConfiguration.asInstanceOf[Xpp3Dom].getChild("compilerArguments").getChildCount) should be(Some(3))

  }

} 
Example 151
Source File: MavenDependenciesMappingRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import com.ebay.rtran.maven.util.MavenModelUtil
import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.collection.JavaConversions._


class MavenDependenciesMappingRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {
  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenDependenciesMappingRule" should "be able to alter dependencies according to mapping" in {
    val ruleConfig = MavenDependenciesMappingRuleConfig(
      Set(SimpleDependency("junit", "junit")),
      Set(SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenDependenciesMappingRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false)
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (true)
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (true)
    }
  }

  "MavenDependenciesMappingRule" should "not alter dependencies that don't exist" in {
    val ruleConfig = MavenDependenciesMappingRuleConfig(
      Set(SimpleDependency("org.slf4j", "slf4j-api")),
      Set(SimpleDependency("org.slf4j", "slf4j-log4j12"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenDependenciesMappingRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (false)
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (false)
    }
  }

  "MavenDependenciesMappingRule" should "alter dependencies matches that match other condition" in {
    val ruleConfig = MavenDependenciesMappingRuleConfig(
      Set(SimpleDependency("junit", "junit", Some("4.9"))),
      Set(SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenDependenciesMappingRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      if (module.pomModel.getPackaging == "pom") {
        module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true)
      } else {
        module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false)
        module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (true)
        module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (true)
      }
    }
  }

  "MavenDependenciesMappingRule" should "not alter dependencies if other condition doesn't match" in {
    val ruleConfig = MavenDependenciesMappingRuleConfig(
      Set(SimpleDependency("junit", "junit", scope = Some("compile"))),
      Set(SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenDependenciesMappingRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true)
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (false)
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (false)
    }
  }
} 
Example 152
Source File: MavenRemoveDependenciesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import com.ebay.rtran.maven.util.MavenModelUtil
import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.collection.JavaConversions._


class MavenRemoveDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {
  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenRemoveDependencies" should "be able to remove dependencies" in {
    val ruleConfig = MavenRemoveDependenciesRuleConfig(
      Set(SimpleDependency("junit", "junit"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false)
    }
  }

  "MavenAddDependenciesRule" should "not remove dependencies that don't exist" in {
    val ruleConfig = MavenRemoveDependenciesRuleConfig(
      Set(SimpleDependency("org.slf4j", "slf4j-api"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveDependenciesRule(ruleConfig)
    val originalSizes = model.modules map (_.pomModel.getDependencies.size)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules map (_.pomModel.getDependencies.size) should be (originalSizes)
  }

  "MavenRemoveDependencies" should "remove dependencies matches that match other condition" in {
    val ruleConfig = MavenRemoveDependenciesRuleConfig(
      Set(SimpleDependency("junit", "junit", version = Some("4.9")))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      if (module.pomModel.getPackaging == "pom") {
        module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true)
      } else {
        module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false)
      }
    }
  }

  "MavenRemoveDependencies" should "not remove dependencies if other condition doesn't match" in {
    val ruleConfig = MavenRemoveDependenciesRuleConfig(
      Set(SimpleDependency("junit", "junit", scope = Some("compile")))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true)
    }
  }
} 
Example 153
Source File: MavenPluginsMappingRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.collection.JavaConversions._


class MavenPluginsMappingRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {
  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenPluginsMappingRule" should "be able to alter both plugins and managed plugins" in {
    val ruleConfig = MavenPluginsMappingRuleConfig(
      List(
        PluginMapping(
          SimplePlugin(Some("com.ebay.rtran.old"), "some-maven-plugin"),
          SimplePlugin(Some("com.ebay.rtran.new"), "some-maven-plugin")
        )
      )
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenPluginsMappingRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins
      .exists(_.getGroupId == "com.ebay.rtran.old") should be (false)

    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins
      .exists(_.getGroupId == "com.ebay.rtran.new") should be (true)

    transformed.parents.head
      .pomModel.getBuild.getPlugins
      .exists(_.getGroupId == "com.ebay.rtran.old") should be (false)

    transformed.parents.head
      .pomModel.getBuild.getPlugins
      .exists(_.getGroupId == "com.ebay.rtran.new") should be (true)
  }

  "MavenPluginsMappingRule" should "not alter plugins or managed plugins that don't exist" in {
    val ruleConfig = MavenPluginsMappingRuleConfig(
      List(
        PluginMapping(
          SimplePlugin(Some("com.ebay.rtran.old"), "non-exist"),
          SimplePlugin(Some("com.ebay.rtran.new"), "non-exist")
        )
      )
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenPluginsMappingRule(ruleConfig)
    val mpSize = model.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins.size
    val pluginSize = model.parents.head
      .pomModel.getBuild.getPlugins.size
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins.size should be (mpSize)

    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins
      .exists(_.getGroupId == "com.ebay.rtran.old") should be (true)

    transformed.parents.head
      .pomModel.getBuild.getPlugins.size should be (pluginSize)

    transformed.parents.head
      .pomModel.getBuild.getPlugins
      .exists(_.getGroupId == "com.ebay.rtran.old") should be (true)
  }
} 
Example 154
Source File: MavenRemoveManagedDependenciesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import com.ebay.rtran.maven.util.MavenModelUtil
import MavenModelUtil.SimpleDependency
import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, Matchers, FlatSpecLike}

import scala.collection.JavaConversions._


class MavenRemoveManagedDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {
  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenRemoveManagedDependenciesRule" should "be able to remove managed dependencies" in {
    val ruleConfig = MavenRemoveManagedDependenciesRuleConfig(
      Set(SimpleDependency("org.eclipse.aether", "aether-spi"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveManagedDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getDependencyManagement.getDependencies.exists(_.getArtifactId == "aether-spi") should be (false)
  }

  "MavenRemoveManagedDependenciesRule" should "not remove managed dependencies that don't exist" in {
    val ruleConfig = MavenRemoveManagedDependenciesRuleConfig(
      Set(SimpleDependency("org.slf4j", "slf4j-api"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveManagedDependenciesRule(ruleConfig)
    val originalSize = model.parents.head
      .pomModel.getDependencyManagement.getDependencies.size
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getDependencyManagement.getDependencies.size should be (originalSize)
  }

  "MavenRemoveManagedDependenciesRule" should "remove managed dependencies matches that match other condition" in {
    val ruleConfig = MavenRemoveManagedDependenciesRuleConfig(
      Set(SimpleDependency("org.eclipse.aether", "aether-spi", version = Some("1.0.2.v20150114")))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveManagedDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getDependencyManagement.getDependencies.exists(_.getArtifactId == "aether-spi") should be (false)
  }

  "MavenRemoveManagedDependenciesRule" should "not remove managed dependencies if other condition doesn't match" in {
    val ruleConfig = MavenRemoveManagedDependenciesRuleConfig(
      Set(SimpleDependency("org.eclipse.aether", "aether-spi", version = Some("1.0.3.v20150114")))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemoveManagedDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getDependencyManagement.getDependencies.exists(_.getArtifactId == "aether-spi") should be (true)
  }
} 
Example 155
Source File: MavenRemoveRepositoriesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}


class MavenRemoveRepositoriesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {
  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenRemoveRepositoriesRule" should "remove repository that matches given patterns" in {
    val ruleConfig = MavenRemoveRepositoriesRuleConfig(
      Set(
        ".*/content/repositories/releases[/]?",
        ".*/content/repositories/snapshots[/]?"
      )
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val rule = new MavenRemoveRepositoriesRule(ruleConfig)
    val model = provider create projectCtx
    provider save (rule transform model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getRepositories.size should be (0)
    }
  }
} 
Example 156
Source File: MavenAddDependenciesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import com.ebay.rtran.maven.util.MavenModelUtil
import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.collection.JavaConversions._


class MavenAddDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenAddDependenciesRule" should "be able to add dependencies" in {
    val ruleConfig = MavenAddDependenciesRuleConfig(
      Set(
        SimpleDependency("org.slf4j", "slf4j-api"),
        SimpleDependency("org.slf4j", "slf4j-log4j12")
      )
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenAddDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (true)
      module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (true)
    }
  }

  "MavenAddDependenciesRule" should "not add dependencies that already exist" in {
    val ruleConfig = MavenAddDependenciesRuleConfig(
      Set(
        SimpleDependency("junit", "junit")
      )
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenAddDependenciesRule(ruleConfig)
    val originalSize = model.modules
      .find(_.pomModel.getPackaging == "pom")
      .map(_.pomModel.getDependencies.size)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules
      .find(_.pomModel.getPackaging == "pom")
      .map(_.pomModel.getDependencies.size) should be (originalSize)
    transformed.modules foreach { module =>
      module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true)
    }
  }
} 
Example 157
Source File: MavenRemovePluginsRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.collection.JavaConversions._


class MavenRemovePluginsRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {
  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenRemovePluginsRule" should "be able to remove both plugins and managed plugins" in {
    val ruleConfig = MavenRemoveManagedPluginsRuleConfig(
      Set(SimplePlugin(artifactId = "maven-source-plugin"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemovePluginsRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins
      .exists(_.getArtifactId == "maven-source-plugin") should be (false)

    transformed.parents.head
      .pomModel.getBuild.getPlugins
      .exists(_.getArtifactId == "maven-source-plugin") should be (false)
  }

  "MavenRemovePluginsRule" should "not remove plugins or managed plugins that don't exist" in {
    val ruleConfig = MavenRemoveManagedPluginsRuleConfig(
      Set(SimplePlugin(artifactId = "maven-surefire-plugin"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemovePluginsRule(ruleConfig)
    val mpSize = model.parents.head.pomModel.getBuild.getPluginManagement.getPlugins.size
    val pluginSize = model.parents.head.pomModel.getBuild.getPlugins.size
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins.size should be (mpSize)

    transformed.parents.head
      .pomModel.getBuild.getPlugins.size should be (pluginSize)
  }

  "MavenRemovePluginsRule" should "remove both plugins and managed plugins matches that match other condition" in {
    val ruleConfig = MavenRemoveManagedPluginsRuleConfig(
      Set(SimplePlugin(artifactId = "maven-source-plugin", version = Some("2.2.1")))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemovePluginsRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins
      .exists(_.getArtifactId == "maven-source-plugin") should be (false)

    transformed.parents.head
      .pomModel.getBuild.getPlugins
      .exists(_.getArtifactId == "maven-source-plugin") should be (false)
  }

  "MavenRemoveManagedPluginsRule" should "not remove plugins or managed plugins if other condition doesn't match" in {
    val ruleConfig = MavenRemoveManagedPluginsRuleConfig(
      Set(SimplePlugin(artifactId = "maven-source-plugin", version = Some("2.2.0")))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx
    val rule = new MavenRemovePluginsRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.parents.head
      .pomModel.getBuild.getPluginManagement.getPlugins
      .exists(_.getArtifactId == "maven-source-plugin") should be (true)

    transformed.parents.head
      .pomModel.getBuild.getPlugins
      .exists(_.getArtifactId == "maven-source-plugin") should be (true)
  }
} 
Example 158
Source File: MavenExcludeDependenciesRuleTest.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.maven

import java.io.File

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers}

import scala.collection.JavaConversions._


class MavenExcludeDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach {

  val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile)
  val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak")

  override def beforeEach = {
    FileUtils.deleteQuietly(destProjectRoot)
    FileUtils.copyDirectory(projectRoot, destProjectRoot)
  }

  "MavenExcludeDependenciesRule" should "exclude the dependencies if they are used transitively" in {
    val ruleConfig = MavenExcludeDependenciesRuleConfig(
      Set(SimpleExclusion("org.springframework", "spring-aop"))
    )
    val projectCtx = new MavenProjectCtx(destProjectRoot)
    val provider = new MultiModuleMavenModelProvider
    val model = provider create projectCtx

    val rule = new MavenExcludeDependenciesRule(ruleConfig)
    provider save rule.transform(model)

    val transformed = provider create projectCtx
    transformed.modules foreach { module =>
      if (module.pomModel.getPackaging != "war") {
        module.pomModel.getDependencies.forall(_.getExclusions.size == 0) should be (true)
      }else {
        module.pomModel.getDependencies.exists(_.getExclusions.size > 0) should be (true)
      }
    }
  }

} 
Example 159
Source File: ModifyFilesRule.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import org.apache.commons.io.FileUtils
import com.ebay.rtran._
import com.ebay.rtran.api.{IRule, IRuleConfig}
import com.ebay.rtran.generic.util.{EncodingDetector, FilePathMatcher}

class ModifyFilesRule(ruleConfig: ModifyFilesRuleConfig) extends IRule[AllFilesModel] {

  override def transform(model: AllFilesModel): AllFilesModel = {
    val modified = model.files filter {file =>
      FilePathMatcher(model.projectRoot, ruleConfig.pathPattern).map(_ matches file) getOrElse false
    } map {file =>
      val content = ruleConfig.encoding map (encoding => FileUtils.readFileToString(file, encoding)) getOrElse {
        val (encoding, bytes) = EncodingDetector.guessEncoding(file)
        new String(bytes, encoding)
      }
      val newContent = ruleConfig.contentMappings.foldLeft(content) {(c, contentMapping) =>
        contentMapping match {
          case ContentMapping(regex, replacement, false) => c.replaceAll(regex, replacement)
          case ContentMapping(regex, replacement, true) => c.replaceFirst(regex, replacement)
        }
      }
      if (content != newContent) {
        FileUtils.write(file, newContent, false)
        Some(file)
      } else None
    } collect {
      case Some(f) => f
    }
    model.copy(modified = modified)
  }

}

case class ModifyFilesRuleConfig(pathPattern: String,
                                 encoding: Option[String],
                                 contentMappings: List[ContentMapping]) extends IRuleConfig

case class ContentMapping(regex: String, replacement: String, firstOnly: Boolean = false) 
Example 160
Source File: AllFilesModel.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import com.ebay.rtran.api.{IModel, IModelProvider}

import scala.collection.JavaConversions._


case class AllFilesModel(projectRoot: File, files: List[File], modified: List[File] = List.empty) extends IModel

class AllFilesModelProvider extends IModelProvider[AllFilesModel, GenericProjectCtx] {
  override def id(): String = getClass.getName

  override def save(model: AllFilesModel): Unit = {
    // all files operations are taken in place
    // simply validate the model
    if (!model.files.forall(_.exists)) {
      throw new IllegalStateException(s"${model.files.filterNot(_.exists)} does not exist")
    }
  }

  override def create(project: GenericProjectCtx): AllFilesModel = AllFilesModel(
    project.rootDir,
    FileUtils.listFiles(project.rootDir, TrueFileFilter.TRUE, TrueFileFilter.TRUE).toList
  )
} 
Example 161
Source File: FilePathMatcher.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic.util

import java.io.File
import java.nio.file.{FileSystems, PathMatcher}

import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.io.FileUtils
import org.mozilla.universalchardet.CharsetListener

import scala.util.Try


object FilePathMatcher {

  def apply(rootDir: File, pathPattern: String): Try[PathMatcher] = Try {
    val trimmedPattern = new String(pathPattern.trim.toCharArray.dropWhile(_ == '/')).trim
	val path=rootDir.getAbsolutePath.replaceAll("\\\\","/")
    FileSystems.getDefault.getPathMatcher(s"glob:${path}/$trimmedPattern")
    //FileSystems.getDefault.getPathMatcher(s"glob:${rootDir.getAbsolutePath}/$trimmedPattern")
  }
}

object EncodingDetector extends LazyLogging {

  val DEFAULT_ENCODING = "UTF-8"

  def guessEncoding(file: File) = {
    val bytes = FileUtils.readFileToByteArray(file)
    val dummyListener = new CharsetListener {
      override def report(charset: String): Unit = {}
    }
    val detector = new org.mozilla.universalchardet.UniversalDetector(dummyListener)
    detector.handleData(bytes, 0, bytes.length)
    detector.dataEnd()
    val encoding = Option(detector.getDetectedCharset) getOrElse DEFAULT_ENCODING
    logger.debug("Detected encoding {} for {}", detector.getDetectedCharset, file)
    detector.reset()
    (encoding, bytes)
  }
} 
Example 162
Source File: MoveFilesRule.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.generic

import java.io.File

import com.ebay.rtran._
import org.apache.commons.io.FileUtils
import com.ebay.rtran.api.{IRule, IRuleConfig}
import com.ebay.rtran.generic.util.FilePathMatcher


class MoveFilesRule(ruleConfig: MoveFilesRuleConfig) extends IRule[AllFilesModel] {

  override def transform(model: AllFilesModel): AllFilesModel = {
    val result = ruleConfig.moves.foldLeft(model.files) {(files, move) =>
      val removes = files filter { file =>
        FilePathMatcher(model.projectRoot, move.pathPattern).map(_ matches file).getOrElse(false)
      }
      val dest = new File(model.projectRoot, move.destDir)
      val creates = removes map {f =>
        FileUtils.moveFileToDirectory(f, dest, true)
        new File(dest, f.getName)
      }
      files diff removes ++ creates
    }
    model.copy(files = result)
  }
}

case class MoveFilesRuleConfig(moves: List[Move]) extends IRuleConfig

case class Move(pathPattern: String, destDir: String) 
Example 163
Source File: XMLFilesModel.scala    From RTran   with Apache License 2.0 5 votes vote down vote up
package com.ebay.rtran.xml

import java.io.{File, FileInputStream}

import org.apache.axiom.om.{OMElement, OMXMLBuilderFactory}
import org.apache.commons.io.FileUtils
import com.ebay.rtran.api.{IModel, IModelProvider}
import com.ebay.rtran.generic.GenericProjectCtx
import com.ebay.rtran.xml.util.XmlUtil

import scala.collection.JavaConversions._
import scala.language.postfixOps
import scala.util.{Success, Try}


case class XMLFilesModel(projectRoot: File,
                         xmlRoots: Map[File, OMElement],
                         modified: Map[File, Option[OMElement]] = Map.empty) extends IModel

class XMLFilesModelProvider extends IModelProvider[XMLFilesModel, GenericProjectCtx] {
  override def id(): String = getClass.getName

  override def save(model: XMLFilesModel): Unit = {
    model.modified foreach {
      case (file, root) => root.map(r => XmlUtil.writeOMElement2File(file, r))
    }
  }

  override def create(projectCtx: GenericProjectCtx): XMLFilesModel = XMLFilesModel(
    projectCtx.rootDir,
    FileUtils.listFiles(projectCtx.rootDir, Array("xml"), true) map {file =>
      file -> Try(OMXMLBuilderFactory.createOMBuilder(new FileInputStream(file)).getDocumentElement)
    } collect {
      case (f, Success(r)) => f -> r
    } toMap
  )
} 
Example 164
Source File: BigQueryClientSpecs.scala    From spark-bigquery   with Apache License 2.0 4 votes vote down vote up
package com.samelamin.spark.bigquery

import java.io.File

import com.google.api.services.bigquery.Bigquery
import com.google.api.services.bigquery.model._
import com.google.cloud.hadoop.io.bigquery._
import com.holdenkarau.spark.testing.DataFrameSuiteBase
import com.samelamin.spark.bigquery.converters.{BigQueryAdapter, SchemaConverters}
import org.apache.commons.io.FileUtils
import org.apache.spark.sql._
import org.mockito.Matchers.{any, eq => mockitoEq}
import org.mockito.Mockito._
import org.scalatest.FeatureSpec
import org.scalatest.mock.MockitoSugar


class BigQueryClientSpecs extends FeatureSpec with DataFrameSuiteBase with MockitoSugar {
  val BQProjectId = "google.com:foo-project"

  def setupBigQueryClient(sqlCtx: SQLContext, bigQueryMock: Bigquery): BigQueryClient = {
    val fakeJobReference = new JobReference()
    fakeJobReference.setProjectId(BQProjectId)
    fakeJobReference.setJobId("bigquery-job-1234")
    val dataProjectId = "publicdata"
    // Create the job result.
    val jobStatus = new JobStatus()
    jobStatus.setState("DONE")
    jobStatus.setErrorResult(null)

    val jobHandle = new Job()
    jobHandle.setStatus(jobStatus)
    jobHandle.setJobReference(fakeJobReference)

    // Create table reference.
    val tableRef = new TableReference()
    tableRef.setProjectId(dataProjectId)
    tableRef.setDatasetId("test_dataset")
    tableRef.setTableId("test_table")

    // Mock getting Bigquery jobs
    when(bigQueryMock.jobs().get(any[String], any[String]).execute())
      .thenReturn(jobHandle)
    when(bigQueryMock.jobs().insert(any[String], any[Job]).execute())
      .thenReturn(jobHandle)

    val bigQueryClient = new BigQueryClient(sqlCtx, bigQueryMock)
    bigQueryClient
  }

  scenario("When writing to BQ") {
    val sqlCtx = sqlContext
    import sqlCtx.implicits._
    val gcsPath = "/tmp/testfile2.json"
    FileUtils.deleteQuietly(new File(gcsPath))
    val adaptedDf = BigQueryAdapter(sc.parallelize(List(1, 2, 3)).toDF)
    val bigQueryMock =  mock[Bigquery](RETURNS_DEEP_STUBS)
    val fullyQualifiedOutputTableId = "testProjectID:test_dataset.test"
    val targetTable = BigQueryStrings.parseTableReference(fullyQualifiedOutputTableId)
    val bigQueryClient = setupBigQueryClient(sqlCtx, bigQueryMock)
    val bigQuerySchema = SchemaConverters.SqlToBQSchema(adaptedDf)

    bigQueryClient.load(targetTable,bigQuerySchema,gcsPath)
    verify(bigQueryMock.jobs().insert(mockitoEq(BQProjectId),any[Job]), times(1)).execute()
  }

  scenario("When reading from BQ") {
    val sqlCtx = sqlContext
    val fullyQualifiedOutputTableId = "testProjectID:test_dataset.test"
    val sqlQuery = s"select * from $fullyQualifiedOutputTableId"

    val bqQueryContext = new BigQuerySQLContext(sqlCtx)
    bqQueryContext.setBigQueryProjectId(BQProjectId)
    val bigQueryMock =  mock[Bigquery](RETURNS_DEEP_STUBS)
    val bigQueryClient = setupBigQueryClient(sqlCtx, bigQueryMock)
    bigQueryClient.selectQuery(sqlQuery)
    verify(bigQueryMock.jobs().insert(mockitoEq(BQProjectId),any[Job]), times(1)).execute()
  }

  scenario("When running a DML Queries") {
    val sqlCtx = sqlContext
    val fullyQualifiedOutputTableId = "testProjectID:test_dataset.test"
    val dmlQuery = s"UPDATE $fullyQualifiedOutputTableId SET test_col = new_value WHERE test_col = old_value"
    val bqQueryContext = new BigQuerySQLContext(sqlCtx)
    bqQueryContext.setBigQueryProjectId(BQProjectId)
    val bigQueryMock =  mock[Bigquery](RETURNS_DEEP_STUBS)
    val bigQueryClient = setupBigQueryClient(sqlCtx, bigQueryMock)
    bigQueryClient.runDMLQuery(dmlQuery)
    verify(bigQueryMock.jobs().insert(mockitoEq(BQProjectId),any[Job]), times(1)).execute()
  }
}