java.io.BufferedWriter Scala Examples

The following examples show how to use java.io.BufferedWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: BasicTestPerformance4Ftp.scala    From ohara   with Apache License 2.0 6 votes vote down vote up
package oharastream.ohara.it.performance

import java.io.{BufferedWriter, OutputStreamWriter}
import java.util.concurrent.atomic.LongAdder

import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import org.junit.AssumptionViolatedException
import spray.json.{JsNumber, JsString, JsValue}

import scala.jdk.CollectionConverters._
import oharastream.ohara.client.filesystem.FileSystem

import scala.concurrent.duration.Duration

abstract class BasicTestPerformance4Ftp extends BasicTestPerformance {
  private[this] val ftpHostname = value(PerformanceTestingUtils.FTP_HOSTNAME_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_HOSTNAME_KEY} is required"))

  private[this] val ftpPort = value(PerformanceTestingUtils.FTP_PORT_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PORT_KEY} is required"))
    .toInt

  private[this] val ftpUser = value(PerformanceTestingUtils.FTP_USER_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_USER_KEY} is required"))

  private[this] val ftpPassword = value(PerformanceTestingUtils.FTP_PASSWORD_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PASSWORD_KEY} is required"))

  
  protected val ftpSettings: Map[String, JsValue] = Map(
    // convert the hostname to IP address
    oharastream.ohara.connector.ftp.FTP_HOSTNAME_KEY  -> JsString(ftpHostname),
    oharastream.ohara.connector.ftp.FTP_PORT_KEY      -> JsNumber(ftpPort),
    oharastream.ohara.connector.ftp.FTP_USER_NAME_KEY -> JsString(ftpUser),
    oharastream.ohara.connector.ftp.FTP_PASSWORD_KEY  -> JsString(ftpPassword)
  )

  private[this] val csvInputFolderKey       = PerformanceTestingUtils.CSV_INPUT_KEY
  private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("/input")

  private[this] val cleanupTestDataKey   = PerformanceTestingUtils.DATA_CLEANUP_KEY
  protected val cleanupTestData: Boolean = value(cleanupTestDataKey).forall(_.toBoolean)

  protected def setupInputData(timeout: Duration): (String, Long, Long) = {
    val client = ftpClient()
    try {
      if (!PerformanceTestingUtils.exists(client, csvOutputFolder))
        PerformanceTestingUtils.createFolder(client, csvOutputFolder)

      val result = generateData(
        numberOfRowsToFlush,
        timeout,
        (rows: Seq[Row]) => {
          val file        = s"$csvOutputFolder/${CommonUtils.randomString()}"
          val writer      = new BufferedWriter(new OutputStreamWriter(client.create(file)))
          val count       = new LongAdder()
          val sizeInBytes = new LongAdder()

          try {
            val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet
            writer
              .append(cellNames.mkString(","))
              .append("\n")
            rows.foreach(row => {
              val content = row.cells().asScala.map(_.value).mkString(",")
              count.increment()
              sizeInBytes.add(content.length)
              writer.append(content).append("\n")
            })
            (count.longValue(), sizeInBytes.longValue())
          } finally Releasable.close(writer)
        }
      )
      (csvOutputFolder, result._1, result._2)
    } finally Releasable.close(client)
  }

  protected[this] def ftpClient() =
    FileSystem.ftpBuilder
      .hostname(ftpHostname)
      .port(ftpPort)
      .user(ftpUser)
      .password(ftpPassword)
      .build
} 
Example 2
Source File: LoadDistLabelKb.scala    From rex   with Apache License 2.0 5 votes vote down vote up
package org.rex.dl

import java.io.{BufferedWriter, File, FileWriter}

import scala.io.Source
import scala.util.Try

object LoadDistLabelKb {

  import org.rex.dl.DistLabelStr._

  type Triple = (Query, Answer, Label)

  def parseTripleFromLine(l: String): Triple = {
    val bits = l.split("\t")
    (bits(0), bits(1), bits(2))
  }

  def apply(f: File): Try[KnowledgeBase] =
    Try {
      Source
        .fromFile(f)
        .getLines()
        .map(parseTripleFromLine)
        .foldLeft(Map.empty[Query, Map[Answer, Labels]]) {

          case (m, (q, a, r)) =>
            if (m contains q) {
              val answersForQ = m(q)
              (m - q) + (q -> (
                if (answersForQ contains a)
                  (answersForQ - a) + (a -> (answersForQ(a) + r))
                else
                  answersForQ + (a -> Set(r))
              ))

            } else
              m + (q -> Map(a -> Set(r)))
        }
    }

  def apply(kb: KnowledgeBase)(f: File): Try[Unit] =
    Try {
      val w = new BufferedWriter(new FileWriter(f))
      try {
        kb.foreach {
          case (q, amap) =>
            amap.foreach {
              case (a, labels) =>
                labels.foreach { l =>
                  w.write(s"${writeTripleToLine(q, a, l)}\n")
                }
            }
        }
      } finally {
        w.close()
      }
    }

  @inline def writeTripleToLine(t: Triple): String =
    writeTripleToLine(t._1, t._2, t._3)

  @inline def writeTripleToLine(q: Query, a: Answer, l: Label): String =
    s"$q\t$q\t$l"

} 
Example 3
Source File: ParseCSVwithHTML.scala    From CSYE7200_Old   with MIT License 5 votes vote down vote up
package edu.neu.coe.csye7200.parse

import java.io.{BufferedWriter, File, FileWriter}

import scala.collection.mutable
import scala.io.{BufferedSource, Source}
import scala.util.{Failure, Success, Try}


object ParseCSVwithHTML extends App {
  val parser = ParseCSVwithHTML(CsvParser(delimiter = '\t' + ""))
  val title = "Report"
  if (args.length > 0) {
    val filename = args.head
    val source: BufferedSource = Source.fromFile(filename, "UTF-16")
    val w = parser.parseStreamIntoHTMLTable(source.getLines.toStream, title)
    val file = new File("output.html")
    val bw = new BufferedWriter(new FileWriter(file))
    bw.write(w)
    bw.close()
    println(s"Successfully written $file")
  }
  else
    System.err.println("syntax: ParseCSVwithHTML filename")

} 
Example 4
Source File: Using.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.jawa.core.compiler.compile.io

import java.io.{Closeable, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JavaFile}
import java.io.{BufferedInputStream, BufferedOutputStream, InputStreamReader, OutputStreamWriter}
import java.io.{BufferedReader, BufferedWriter}
import java.util.zip.GZIPInputStream
import java.net.URL
import java.nio.channels.FileChannel
import java.nio.charset.Charset
import java.util.jar.{JarFile, JarInputStream, JarOutputStream}
import java.util.zip.{GZIPOutputStream, ZipEntry, ZipFile, ZipInputStream, ZipOutputStream}

import ErrorHandling.translate

import scala.reflect.{Manifest => SManifest}

abstract class Using[Source, T]
{
  protected def open(src: Source): T
  def apply[R](src: Source)(f: T => R): R =
  {
    val resource = open(src)
    try { f(resource) }
    finally { close(resource) }
  }
  protected def close(out: T): Unit
}
abstract class WrapUsing[Source, T](implicit srcMf: SManifest[Source], targetMf: SManifest[T]) extends Using[Source, T]
{
  protected def label[S](m: SManifest[S]): String = m.runtimeClass.getSimpleName
  protected def openImpl(source: Source): T
  protected final def open(source: Source): T =
    translate("Error wrapping " + label(srcMf) + " in " + label(targetMf) + ": ") { openImpl(source) }
}
trait OpenFile[T] extends Using[JavaFile, T]
{
  protected def openImpl(file: JavaFile): T
  protected final def open(file: JavaFile): T =
  {
    val parent = file.getParentFile
    if(parent != null)
      IO.createDirectory(parent)
    openImpl(file)
  }
}
object Using
{
  def wrap[Source, T<: Closeable](openF: Source => T)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    wrap(openF, closeCloseable)
  def wrap[Source, T](openF: Source => T, closeF: T => Unit)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    new WrapUsing[Source, T]
    {
      def openImpl(source: Source): T = openF(source)
      def close(t: T): Unit = closeF(t)
    }

  def resource[Source, T <: Closeable](openF: Source => T): Using[Source,T] =
    resource(openF, closeCloseable)
  def resource[Source, T](openF: Source => T, closeF: T => Unit): Using[Source,T] =
    new Using[Source,T]
    {
      def open(s: Source): T = openF(s)
      def close(s: T): Unit = closeF(s)
    }
  def file[T <: Closeable](openF: JavaFile => T): OpenFile[T] = file(openF, closeCloseable)
  def file[T](openF: JavaFile => T, closeF: T => Unit): OpenFile[T] =
    new OpenFile[T]
    {
      def openImpl(file: JavaFile): T = openF(file)
      def close(t: T): Unit = closeF(t)
    }
  private def closeCloseable[T <: Closeable]: T => Unit = _.close()

  def bufferedOutputStream: Using[OutputStream, BufferedOutputStream] = wrap((out: OutputStream) => new BufferedOutputStream(out) )
  def bufferedInputStream: Using[InputStream, BufferedInputStream] = wrap((in: InputStream) => new BufferedInputStream(in) )
  def fileOutputStream(append: Boolean = false): OpenFile[BufferedOutputStream] = file(f => new BufferedOutputStream(new FileOutputStream(f, append)))
  def fileInputStream: OpenFile[BufferedInputStream] = file(f => new BufferedInputStream(new FileInputStream(f)))
  def urlInputStream: Using[URL, BufferedInputStream] = resource((u: URL) => translate("Error opening " + u + ": ")(new BufferedInputStream(u.openStream)))
  def fileOutputChannel: OpenFile[FileChannel] = file(f => new FileOutputStream(f).getChannel)
  def fileInputChannel: OpenFile[FileChannel] = file(f => new FileInputStream(f).getChannel)
  def fileWriter(charset: Charset = IO.utf8, append: Boolean = false): OpenFile[BufferedWriter] =
    file(f => new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, append), charset)) )
  def fileReader(charset: Charset): OpenFile[BufferedReader] = file(f => new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)) )
  def urlReader(charset: Charset): Using[URL, BufferedReader] = resource((u: URL) => new BufferedReader(new InputStreamReader(u.openStream, charset)))
  def jarFile(verify: Boolean): OpenFile[JarFile] = file(f => new JarFile(f, verify), (_: JarFile).close())
  def zipFile: OpenFile[ZipFile] = file(f => new ZipFile(f), (_: ZipFile).close())
  def streamReader: Using[(InputStream, Charset), InputStreamReader] = wrap{ (_: (InputStream, Charset)) match { case (in, charset) => new InputStreamReader(in, charset) } }
  def gzipInputStream: Using[InputStream, GZIPInputStream] = wrap((in: InputStream) => new GZIPInputStream(in, 8192) )
  def zipInputStream: Using[InputStream, ZipInputStream] = wrap((in: InputStream) => new ZipInputStream(in))
  def zipOutputStream: Using[OutputStream, ZipOutputStream] = wrap((out: OutputStream) => new ZipOutputStream(out))
  def gzipOutputStream: Using[OutputStream, GZIPOutputStream] = wrap((out: OutputStream) => new GZIPOutputStream(out, 8192), (_: GZIPOutputStream).finish())
  def jarOutputStream: Using[OutputStream, JarOutputStream] = wrap((out: OutputStream) => new JarOutputStream(out))
  def jarInputStream: Using[InputStream, JarInputStream] = wrap((in: InputStream) => new JarInputStream(in))
  def zipEntry(zip: ZipFile): Using[ZipEntry, InputStream] = resource((entry: ZipEntry) =>
    translate("Error opening " + entry.getName + " in " + zip + ": ") { zip.getInputStream(entry) } )
} 
Example 5
Source File: FileLogger.scala    From odin   with Apache License 2.0 5 votes vote down vote up
package io.odin.loggers

import java.io.BufferedWriter
import java.nio.file.{Files, Paths}

import cats.effect.syntax.all._
import cats.effect.{Resource, Sync, Timer}
import cats.instances.list._
import cats.syntax.all._
import io.odin.formatter.Formatter
import io.odin.{Level, Logger, LoggerMessage}


case class FileLogger[F[_]: Timer](buffer: BufferedWriter, formatter: Formatter, override val minLevel: Level)(
    implicit F: Sync[F]
) extends DefaultLogger[F](minLevel) {
  def log(msg: LoggerMessage): F[Unit] =
    write(msg, formatter).guarantee(flush)

  override def log(msgs: List[LoggerMessage]): F[Unit] =
    msgs.traverse(write(_, formatter)).void.guarantee(flush)

  private def write(msg: LoggerMessage, formatter: Formatter): F[Unit] =
    F.delay {
      buffer.write(formatter.format(msg) + System.lineSeparator())
    }

  private def flush: F[Unit] = F.delay(buffer.flush()).handleErrorWith(_ => F.unit)
}

object FileLogger {
  def apply[F[_]: Timer](fileName: String, formatter: Formatter, minLevel: Level)(
      implicit F: Sync[F]
  ): Resource[F, Logger[F]] = {
    def mkBuffer: F[BufferedWriter] = F.delay(Files.newBufferedWriter(Paths.get(fileName)))
    def closeBuffer(buffer: BufferedWriter): F[Unit] =
      F.delay(buffer.close()).handleErrorWith(_ => F.unit)

    Resource.make(mkBuffer)(closeBuffer).map { buffer =>
      FileLogger(buffer, formatter, minLevel)
    }
  }
} 
Example 6
Source File: TsvRetrieverFromFile.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.dc.stream

import java.io.{BufferedWriter, File, FileWriter}

import akka.actor.ActorSystem
import akka.stream.{KillSwitch, KillSwitches, Materializer}
import akka.stream.Supervision.Decider
import akka.stream.contrib.SourceGen
import akka.stream.scaladsl.{Flow, Keep, Sink, Source}
import akka.util.ByteString
import cmwell.dc.LazyLogging
import cmwell.dc.stream.MessagesTypesAndExceptions.{DcInfo, InfotonData}
import cmwell.dc.stream.TsvRetriever.{logger, TsvFlowOutput}
import cmwell.util.resource._

import scala.concurrent.Future
import scala.util.{Failure, Success}
import scala.concurrent.ExecutionContext.Implicits.global


object TsvRetrieverFromFile extends LazyLogging {

  def apply(dcInfo: DcInfo)(implicit mat: Materializer,
                            system: ActorSystem): Source[InfotonData, (KillSwitch, Future[Seq[Option[String]]])] = {
    val persistFile = dcInfo.tsvFile.get + ".persist"

    def appendToPersistFile(str: String): Unit = {
      val bw = new BufferedWriter(new FileWriter(persistFile, true))
      bw.write(str)
      bw.close()
    }

    val linesToDrop = dcInfo.positionKey.fold {
      if (!new File(persistFile).exists) 0L
      else using(scala.io.Source.fromFile(persistFile))(_.getLines.toList.last.toLong)
    }(pos => pos.toLong)
    val positionKeySink = Flow[InfotonData]
      .recover {
        case e: Throwable => InfotonData(null, null, -1)
      }
      .scan(linesToDrop) {
        case (count, InfotonData(null, null, -1)) => {
          appendToPersistFile("crash at: " + count + "\n" + count.toString + "\n")
          count
        }
        case (count, _) => {
          val newCount = count + 1
          if (newCount % 10000 == 0) appendToPersistFile(newCount.toString + "\n")
          newCount
        }
      }
      .toMat(Sink.last)(
        (_, right) =>
          right.map { count =>
            appendToPersistFile(count.toString + "\n")
            Seq.fill(2)(Option(count.toString))
        }
      )

    Source
      .fromIterator(() => scala.io.Source.fromFile(dcInfo.tsvFile.get).getLines())
      .drop {
        logger.info(s"Dropping $linesToDrop initial lines from file ${dcInfo.tsvFile.get} for sync ${dcInfo.key}")
        linesToDrop
      }
      .viaMat(KillSwitches.single)(Keep.right)
      .map(line => TsvRetriever.parseTSVAndCreateInfotonDataFromIt(ByteString(line)))
      .alsoToMat(positionKeySink)(Keep.both)
  }
} 
Example 7
Source File: HadoopFSHelpers.scala    From morpheus   with Apache License 2.0 5 votes vote down vote up
package org.opencypher.morpheus.api.io.fs

import java.io.{BufferedReader, BufferedWriter, InputStreamReader, OutputStreamWriter}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.opencypher.morpheus.api.io.util.FileSystemUtils.using

object HadoopFSHelpers {

  implicit class RichHadoopFileSystem(fileSystem: FileSystem) {

    protected def createDirectoryIfNotExists(path: Path): Unit = {
      if (!fileSystem.exists(path)) {
        fileSystem.mkdirs(path)
      }
    }

    def listDirectories(path: String): List[String] = {
      val p = new Path(path)
      createDirectoryIfNotExists(p)
      fileSystem.listStatus(p)
        .filter(_.isDirectory)
        .map(_.getPath.getName)
        .toList
    }

    def deleteDirectory(path: String): Unit = {
      fileSystem.delete(new Path(path),  true)
    }

    def readFile(path: String): String = {
      using(new BufferedReader(new InputStreamReader(fileSystem.open(new Path(path)), "UTF-8"))) { reader =>
        def readLines = Stream.cons(reader.readLine(), Stream.continually(reader.readLine))
        readLines.takeWhile(_ != null).mkString
      }
    }

    def writeFile(path: String, content: String): Unit = {
      val p = new Path(path)
      val parentDirectory = p.getParent
      createDirectoryIfNotExists(parentDirectory)
      using(fileSystem.create(p)) { outputStream =>
        using(new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"))) { bufferedWriter =>
          bufferedWriter.write(content)
        }
      }
    }
  }

} 
Example 8
Source File: Main.scala    From scalajs-highcharts   with MIT License 5 votes vote down vote up
package com.karasiq.highcharts.generator

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter, PrintWriter}
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes

import scala.util.control.Exception
import scalaj.http.{Http, HttpOptions}

import com.karasiq.highcharts.generator.writers.{ScalaClassWriter, ScalaJsClassBuilder}

case class HighchartsApiDoc(library: String) {
  private val defaultPackage = System.getProperty(s"highcharts-generator.$library.package", s"com.$library")

  private def httpGet(url: String): List[ConfigurationObject] = {
    val page = Http.get(url)
      .header("User-Agent", "Mozilla/5.0 (X11; OpenBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36")
      .header("Accept", "application/json")
      .options(HttpOptions.connTimeout(10000), HttpOptions.readTimeout(10000))

    val json = page.asString
    ConfigurationObject.fromJson(json)
  }

  private def writeFiles(pkg: String, configs: List[ConfigurationObject], rootObject: Option[String] = None): Unit = {
    val header =
      s"""
          |package $pkg
          |
          |import scalajs.js, js.`|`
          |import com.highcharts.CleanJsObject
          |import com.highcharts.HighchartsUtils._
          |
          |""".stripMargin

    val outputDir = Paths.get(System.getProperty("highcharts-generator.output", "src/main/scala"), pkg.split("\\."):_*)
    Files.createDirectories(outputDir)

    // Remove all files
    Files.walkFileTree(outputDir, new SimpleFileVisitor[Path] {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }
    })

    val classes = new ScalaJsClassBuilder().parse(configs, rootObject)
    val classWriter = new ScalaClassWriter
    classes.foreach { scalaJsClass ⇒
      val file = outputDir.resolve(scalaJsClass.scalaName + ".scala")
      println(s"Writing $file...")
      val writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file.toFile, true), "UTF-8")))
      Exception.allCatch.andFinally(writer.close()) {
        if (Files.size(file) == 0) {
          writer.print(header)
        }
        classWriter.writeClass(scalaJsClass) { line ⇒
          writer.println(line)
        }
        writer.flush()
      }
    }
  }

  def writeConfigs(): Unit = {
    val configs = httpGet(s"https://api.highcharts.com/$library/dump.json")
    writeFiles(s"$defaultPackage.config", configs, Some(s"${library.capitalize}Config"))
  }

  def writeApis(): Unit = {
    val configs = httpGet(s"https://api.highcharts.com/$library/object/dump.json")
    writeFiles(s"$defaultPackage.api", configs)
  }

  def writeAll(): Unit = {
    // TODO: https://github.com/highcharts/highcharts/issues/7227
    writeConfigs()
    // writeApis() // TODO: 404
  }
}

object Main extends App {
  HighchartsApiDoc("highcharts").writeAll()
  HighchartsApiDoc("highstock").writeAll()
  HighchartsApiDoc("highmaps").writeAll()
} 
Example 9
Source File: FeatureSelection.scala    From aerosolve   with Apache License 2.0 5 votes vote down vote up
package com.airbnb.aerosolve.training

import java.io.BufferedWriter
import java.io.OutputStreamWriter
import java.util

import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example}
import com.airbnb.aerosolve.core.models.LinearModel
import com.airbnb.aerosolve.core.util.Util
import com.typesafe.config.Config
import org.slf4j.{LoggerFactory, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.Buffer
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import scala.util.Random
import scala.math.abs
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

object FeatureSelection {
  private final val log: Logger = LoggerFactory.getLogger("FeatureSelection")
  val allKey : (String, String) = ("$ALL", "$POS")

  // Given a RDD compute the pointwise mutual information between
  // the positive label and the discrete features.
  def pointwiseMutualInformation(examples : RDD[Example],
                                 config : Config,
                                 key : String,
                                 rankKey : String,
                                 posThreshold : Double,
                                 minPosCount : Double,
                                 newCrosses : Boolean) : RDD[((String, String), Double)] = {
    val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey)
    val features = pointwise
      .mapPartitions(part => {
      // The tuple2 is var, var | positive
      val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]()
      part.foreach(example =>{
        val featureVector = example.example.get(0)
        val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0
        else 0.0
        val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0))
        output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos))

        val features : Array[(String, String)] =
          LinearRankerUtils.getFeatures(featureVector)
        if (newCrosses) {
          for (i <- features) {
            for (j <- features) {
              if (i._1 < j._1) {
                val key = ("%s<NEW>%s".format(i._1, j._1),
                           "%s<NEW>%s".format(i._2, j._2))
                val x = output.getOrElse(key, (0.0, 0.0))
                output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos))
              }
            }
          }
        }
        for (feature <- features) {
          val x = output.getOrElse(feature, (0.0, 0.0))
          output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos))
        }
      })
      output.iterator
    })
    .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2))
    .filter(x => x._2._2 >= minPosCount)

    val allCount = features.filter(x => x._1.equals(allKey)).take(1).head

    features.map(x => {
      val prob = x._2._1 / allCount._2._1
      val probPos = x._2._2 / allCount._2._2
      (x._1, math.log(probPos / prob) / math.log(2.0))
    })
  }

  // Returns the maximum entropy per family
  def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = {
    input
      .map(x => (x._1._1, (x._1._2, x._2)))
      .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b)
      .map(x => ((x._1, x._2._1), x._2._2))
  }
} 
Example 10
Source File: FileDownloader.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

} 
Example 11
Source File: JsonFileReporter.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.metrics

import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter}
import java.util.{Timer, TimerTask}
import java.util.concurrent.TimeUnit

import scala.util.Try
import scala.util.control.NonFatal

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.kyuubi.Logging
import org.apache.spark.{KyuubiSparkUtil, SparkConf}
import org.apache.spark.KyuubiConf._

private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry)
  extends Closeable with Logging {

  private val jsonMapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false))
  private val timer = new Timer(true)
  private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL))
  private val path = conf.get(METRICS_REPORT_LOCATION)
  private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf)

  def start(): Unit = {
    timer.schedule(new TimerTask {
      var bw: BufferedWriter = _
      override def run(): Unit = try {
        val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry)
        val tmpPath = new Path(path + ".tmp")
        val tmpPathUri = tmpPath.toUri
        val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) {
          FileSystem.getLocal(hadoopConf)
        } else {
          FileSystem.get(tmpPathUri, hadoopConf)
        }
        fs.delete(tmpPath, true)
        bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true)))
        bw.write(json)
        bw.close()
        fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
        val finalPath = new Path(path)
        fs.rename(tmpPath, finalPath)
        fs.setPermission(finalPath,
          FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
      } catch {
        case NonFatal(e) => error("Error writing metrics to json file" + path, e)
      } finally {
        if (bw != null) {
          Try(bw.close())
        }
      }
    }, 0, interval)
  }

  override def close(): Unit = {
    timer.cancel()
  }
} 
Example 12
Source File: NerHelper.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.benchmarks.spark

import java.io.{BufferedWriter, File, FileWriter}

import com.johnsnowlabs.nlp.annotators.common.NerTagged
import com.johnsnowlabs.nlp.training.CoNLL
import com.johnsnowlabs.nlp.{Annotation, SparkAccessor}
import com.johnsnowlabs.nlp.util.io.ExternalResource
import org.apache.spark.ml.PipelineModel

import scala.collection.mutable


object NerHelper {

  
  def saveNerSpanTags(annotations: Array[Array[Annotation]], file: String): Unit = {
    val bw = new BufferedWriter(new FileWriter(new File(file)))

    bw.write(s"start\tend\ttag\ttext\n")
    for (i <- 0 until annotations.length) {
      for (a <- annotations(i))
        bw.write(s"${a.begin}\t${a.end}\t${a.result}\t${a.metadata("entity").replace("\n", " ")}\n")
    }
    bw.close()
  }

  def calcStat(correct: Int, predicted: Int, predictedCorrect: Int): (Float, Float, Float) = {
    // prec = (predicted & correct) / predicted
    // rec = (predicted & correct) / correct
    val prec = predictedCorrect.toFloat / predicted
    val rec = predictedCorrect.toFloat / correct
    val f1 = 2 * prec * rec / (prec + rec)

    (prec, rec, f1)
  }

  def measureExact(nerReader: CoNLL, model: PipelineModel, file: ExternalResource, printErrors: Int = 0): Unit = {
    val df = nerReader.readDataset(SparkAccessor.benchmarkSpark, file.path).toDF()
    val transformed = model.transform(df)
    val rows = transformed.select("ner_span", "label_span").collect()

    val correctPredicted = mutable.Map[String, Int]()
    val predicted = mutable.Map[String, Int]()
    val correct = mutable.Map[String, Int]()
    var toPrintErrors = printErrors

    for (row <- rows) {

      val predictions = NerTagged.getAnnotations(row, 0).filter(a => a.result != "O")
      val labels = NerTagged.getAnnotations(row, 1).filter(a => a.result != "O")

      for (p <- predictions) {
        val tag = p.metadata("entity")
        predicted(tag) = predicted.getOrElse(tag, 0) + 1
      }

      for (l <- labels) {
        val tag = l.metadata("entity")
        correct(tag) = correct.getOrElse(tag, 0) + 1
      }

      val correctPredictions = labels.toSet.intersect(predictions.toSet)

      for (a <- correctPredictions) {
        val tag = a.metadata("entity")
        correctPredicted(tag) = correctPredicted.getOrElse(tag, 0) + 1
      }

      if (toPrintErrors > 0) {
        for (p <- predictions) {
          if (toPrintErrors > 0 && !correctPredictions.contains(p)) {
            System.out.println(s"Predicted\t${p.result}\t${p.begin}\t${p.end}\t${p.metadata("text")}")
            toPrintErrors -= 1
          }
        }

        for (p <- labels) {
          if (toPrintErrors > 0 && !correctPredictions.contains(p)) {
            System.out.println(s"Correct\t${p.result}\t${p.begin}\t${p.end}\t${p.metadata("text")}")
            toPrintErrors -= 1
          }
        }
      }
    }

    val (prec, rec, f1) = calcStat(correct.values.sum, predicted.values.sum, correctPredicted.values.sum)
    System.out.println(s"$prec\t$rec\t$f1")

    val tags = (correct.keys ++ predicted.keys ++ correctPredicted.keys).toList.distinct

    for (tag <- tags) {
      val (prec, rec, f1) = calcStat(correct.getOrElse(tag, 0), predicted.getOrElse(tag, 0), correctPredicted.getOrElse(tag, 0))
      System.out.println(s"$tag\t$prec\t$rec\t$f1")
    }
  }
} 
Example 13
Source File: TrajSampling.scala    From traj-sim-spark   with Apache License 2.0 5 votes vote down vote up
package edu.utah.cs.trajectory

import java.io.{BufferedWriter, File, FileWriter}

import edu.utah.cs.spatial.{LineSegment, Point}
import org.apache.spark.{SparkConf, SparkContext}


object TrajSampling {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("TrajSampling")
    val sc = new SparkContext(sparkConf)

    Thread.sleep(3000)

    if (args.length < 2) {
      println("usage: TrajSampling <input_file_path> <output_file_path> <sample_count>")
      System.exit(1)
    }

    val input_file_path = args(0)
    val output_file_path = args(1)
    val cnt = args(2).toInt

    val sampled_trajs = sc.textFile(input_file_path).mapPartitions(iter => {
      iter.map(x => {
        val splitted = x.split("\t")
        (splitted(0).toInt,
          LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)),
            Point(Array(splitted(3).toDouble, splitted(4).toDouble))))
      }).toArray.groupBy(_._1).map(now => (now._1, now._2.sortBy(_._1).map(_._2))).iterator
    }).takeSample(withReplacement = false, cnt, System.currentTimeMillis())

    val file = new File(output_file_path)
    val bw = new BufferedWriter(new FileWriter(file))

    for (i <- sampled_trajs.indices) {
      val cur_traj = sampled_trajs(i)._2
      cur_traj.foreach(x => bw.write(i + "\t" + x.toTSV + "\n"))
    }

    bw.close()

    sc.stop()
  }
} 
Example 14
Source File: LineSegmentClustering.scala    From traj-sim-spark   with Apache License 2.0 5 votes vote down vote up
package edu.utah.cs.trajectory

import java.io.{BufferedWriter, File, FileWriter}

import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory}
import edu.utah.cs.partitioner.STRSegPartition
import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon}
import edu.utah.cs.util.{BloomFilter, BloomFilterMeta}
import org.apache.spark.{SparkConf, SparkContext}
import org.geotools.geojson.geom.GeometryJSON


object LineSegmentClustering {
  final val max_entries_per_node = 25
  final val k = 10
  final val N = 34085

  def main(args: Array[String]): Unit = {
    val sc = new SparkContext(new SparkConf().setAppName("LineSegmentClustering"))

    if (args.length < 2) {
      println("usage: SpatialSpanClustering <input_file_path> <output_file_path>")
      System.exit(1)
    }

    val input_file_path = args(0)
    val output_file_path = args(1)

    val dataRDD = sc.textFile(input_file_path)
      .map(x => x.split('\t'))
      .map(x => (LineSegment(Point(Array(x(2).toDouble, x(1).toDouble)),
        Point(Array(x(4).toDouble, x(3).toDouble))),
        TrajMeta(x(0).toInt, x(5).toInt)))

    val bf_meta = BloomFilterMeta(N, 1)
    val bc_bf_meta = sc.broadcast(bf_meta)
    BloomFilter.meta = bf_meta

    val num_partitions = dataRDD.getNumPartitions
    val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, num_partitions, 0.01, max_entries_per_node)

    val part_bounds = partitioned_rdd.mapPartitions(iter => {
      if (iter.nonEmpty) {
        var maxx = Double.MinValue
        var maxy = Double.MinValue
        var minx = Double.MaxValue
        var miny = Double.MaxValue
        iter.map(_._1).foreach(x => {
          maxx = Math.max(Math.max(x.start.coord(0), x.end.coord(0)), maxx)
          maxy = Math.max(Math.max(x.start.coord(1), x.end.coord(1)), maxy)
          minx = Math.min(Math.min(x.start.coord(0), x.end.coord(0)), minx)
          miny = Math.min(Math.min(x.start.coord(1), x.end.coord(1)), miny)
        })
        Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator
      } else Array().iterator
    }).collect()

    val file = new File(output_file_path)
    val bw = new BufferedWriter(new FileWriter(file))

    val collection = new GeometryCollection(part_bounds.map(x =>
      Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))),
        x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory)

    new GeometryJSON().writeGeometryCollection(collection, bw)

    bw.close()

    sc.stop()
  }
} 
Example 15
Source File: SpatialSpanClustering.scala    From traj-sim-spark   with Apache License 2.0 5 votes vote down vote up
package edu.utah.cs.trajectory

import java.io.{BufferedWriter, File, FileWriter}

import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory}
import edu.utah.cs.partitioner.STRMBRPartition
import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon}
import edu.utah.cs.util._
import org.apache.spark.{SparkConf, SparkContext}
import org.geotools.geojson.geom.GeometryJSON


object SpatialSpanClustering {
  final val max_entries_per_node = 25

  def getMBR(x: (Int, Array[(Int, LineSegment)])): (MBR, Int) = {
    val pts = x._2.flatMap(p => Array(p._2.start, p._2.end))
    var maxx = Double.MinValue
    var maxy = Double.MinValue
    var minx = Double.MaxValue
    var miny = Double.MaxValue
    pts.foreach(x => {
      maxx = Math.max(x.coord(0), maxx)
      maxy = Math.max(x.coord(1), maxy)
      minx = Math.min(x.coord(0), minx)
      miny = Math.min(x.coord(1), miny)
    })
    (MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))), x._1)
  }

  def main(args: Array[String]): Unit = {
    val sc = new SparkContext(new SparkConf().setAppName("SpatialSpanClustering"))

    if (args.length < 2) {
      println("usage: SpatialSpanClustering <input_file_path> <output_file_path>")
      System.exit(1)
    }

    val input_file_path = args(0)
    val output_file_path = args(1)

    val bf_meta = BloomFilterMeta(10000, 1)
    val bc_bf_meta = sc.broadcast(bf_meta)
    BloomFilter.meta = bf_meta

    val mbrs = sc.textFile(input_file_path).mapPartitions(iter => {
      iter.map(x => {
        val splitted = x.split("\t")
        (splitted(0).toInt,
          LineSegment(Point(Array(splitted(2).toDouble, splitted(1).toDouble)),
            Point(Array(splitted(4).toDouble, splitted(3).toDouble))))
      }).toArray.groupBy(_._1).map(now => getMBR(now)).iterator
    })

    val num_partitions = mbrs.getNumPartitions * 4

    val partitioned_rdd = STRMBRPartition(mbrs, num_partitions, 0.01, max_entries_per_node)

    val part_bounds = partitioned_rdd.mapPartitions(iter => {
      if (iter.nonEmpty) {
        var maxx = Double.MinValue
        var maxy = Double.MinValue
        var minx = Double.MaxValue
        var miny = Double.MaxValue
        iter.map(_._1).foreach(x => {
          maxx = Math.max(x.high.coord(0), maxx)
          maxy = Math.max(x.high.coord(1), maxy)
          minx = Math.min(x.low.coord(0), minx)
          miny = Math.min(x.low.coord(1), miny)
        })
        Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator
      } else Array().iterator
    }).collect()

    val file = new File(output_file_path)
    val bw = new BufferedWriter(new FileWriter(file))

    val collection = new GeometryCollection(part_bounds.map(x =>
      Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))),
        x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory)

    new GeometryJSON().writeGeometryCollection(collection, bw)

    bw.close()

    sc.stop()
  }
} 
Example 16
Source File: NERServiceSpec.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package transform.ner

import java.io.File
import org.specs2.mutable._
import org.specs2.runner._
import org.junit.runner._
import org.joox.JOOX._
import org.pelagios.recogito.sdk.ner.EntityType
import play.api.test._
import play.api.test.Helpers._
import scala.concurrent.duration._
import scala.io.Source
import org.codehaus.plexus.util.StringInputStream
import java.io.StringWriter
import java.io.BufferedWriter

@RunWith(classOf[JUnitRunner])
class NERServiceSpec extends Specification {

  def parsePlaintext() = {
    val TEST_TEXT = Source.fromFile("test/resources/transform/ner/text-for-ner-01.txt").getLines().mkString("\n")
    NERService.parseText(TEST_TEXT, None)
  }
  
  def enrichTEI() = {
    val TEST_TEI = 
      new File("test/resources/transform/ner/tei-for-ner.tei.xml")
    
    val writer = new StringWriter()
    NERService.enrichTEI(TEST_TEI, None, Some(new BufferedWriter(writer)))
    $(writer.toString)
  }

  "The NER text parse function" should {
      
    "detect 8 Named Entites in the test text" in {
      val entities = parsePlaintext()
      entities.size must equalTo (8)
    }

    "detect 3 Locations - Pylos, Sparta and Ithaca" in {
      val entities = parsePlaintext()
      val locations = entities.filter(_.entityType == EntityType.LOCATION).map(_.chars)
      locations.size must equalTo(3)
      locations must contain("Pylos")
      locations must contain("Sparta")
      locations must contain("Ithaca")
    }

    "detect 1 date" in {
      val entities = parsePlaintext()
      entities.filter(_.entityType.equals(EntityType.DATE)).size must equalTo(1)
    }

    "detect 4 persons - Ulysses (2x), Penelope and Telemachus" in {
      val entities = parsePlaintext()
      val persons = entities.filter(_.entityType == EntityType.PERSON).map(_.chars)
      persons.size must equalTo(4)
      persons must contain("Penelope")
      persons must contain("Telemachus")
      persons.filter(_.equals("Ulysses")).size must equalTo(2)
    }

    "retain correct char offsets for each entity" in {
      val TEST_TEXT = Source.fromFile("test/resources/transform/ner/text-for-ner-01.txt").getLines().mkString("\n")
      val entities = parsePlaintext()
      entities.map(e => {
        val snippetFromSourceFile = TEST_TEXT.substring(e.charOffset, e.charOffset + e.chars.size)
        snippetFromSourceFile must equalTo(e.chars)
      })
    }

  }
  
  "The NER TEI enrichment function" should {
        
    "insert 11 placeName tags" in {
      val enriched = enrichTEI()
      enriched.find("placeName").size must equalTo(11) 
    }
    
    "insert 24 persName tags" in {
      val enriched = enrichTEI()
      enriched.find("persName").size must equalTo(24)       
    }
    
  }

} 
Example 17
Source File: MnistManager.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.datasets.mnist

import java.io.{BufferedWriter, FileWriter, IOException}

import org.dl4scala.datasets.fetchers.MnistDataFetcher


  def close(): Unit = {
    if (images != null) {
      try
        images.close()
      catch {
        case e: IOException =>

      }
      images = null
    }
    if (labels != null) {
      try
        labels.close()
      catch {
        case e: IOException =>
      }
      labels = null
    }
  }
}

object MnistManager {
  private val HEADER_SIZE = 8

  @throws(classOf[IOException])
  def writeImageToPpm(image: Array[Array[Int]], ppmFileName: String): Unit = {
    try {
      val ppmOut = new BufferedWriter(new FileWriter(ppmFileName))
      val rows = image.length
      val cols = image(0).length
      ppmOut.write("P3\n")
      ppmOut.write("" + rows + " " + cols + " 255\n")

      (0 until rows).foreach{ i =>
        val s = new StringBuilder
        (0 until cols).foreach(j => s.append(image(i)(j) + " " + image(i)(j) + " " + image(i)(j) + "  "))
        ppmOut.write(s.toString)
      }
      ppmOut.close()
    } catch {
      case e: Exception => println("BufferedWriter error" + e.printStackTrace())
    }
  }
} 
Example 18
Source File: VerifierLoggerBenchmark.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.transaction.smart

import java.io.BufferedWriter
import java.nio.file.{Files, Path, Paths}
import java.util.concurrent.TimeUnit

import cats.Id
import com.wavesplatform.account.KeyPair
import com.wavesplatform.common.state.ByteStr
import com.wavesplatform.common.utils._
import com.wavesplatform.lang.v1.compiler.Terms
import com.wavesplatform.lang.v1.compiler.Terms.{CONST_BOOLEAN, EVALUATED}
import com.wavesplatform.lang.v1.evaluator.Log
import com.wavesplatform.lang.v1.evaluator.ctx.impl.waves.Bindings
import com.wavesplatform.state.BinaryDataEntry
import com.wavesplatform.transaction.DataTransaction
import com.wavesplatform.transaction.smart.VerifierLoggerBenchmark.BigLog
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

@OutputTimeUnit(TimeUnit.MILLISECONDS)
@BenchmarkMode(Array(Mode.AverageTime))
@Threads(1)
@Fork(1)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
class VerifierLoggerBenchmark {

  @Benchmark
  def verifierLogged(bh: Blackhole, log: BigLog): Unit = {
    val logs = Verifier.buildLogs("id", log.value)
    bh.consume(log.writer.write(logs))
  }
}

object VerifierLoggerBenchmark {

  @State(Scope.Benchmark)
  class BigLog {

    val resultFile: Path       = Paths.get("log.txt")
    val writer: BufferedWriter = Files.newBufferedWriter(resultFile)

    private val dataTx: DataTransaction = DataTransaction
      .selfSigned(1.toByte, KeyPair(Array[Byte]()), (1 to 4).map(i => BinaryDataEntry(s"data$i", ByteStr(Array.fill(1024 * 30)(1)))).toList, 100000000, 0)
      .explicitGet()

    private val dataTxObj: Terms.CaseObj = Bindings.transactionObject(
      RealTransactionWrapper(dataTx, ???, ???, ???).explicitGet(),
      proofsEnabled = true
    )

    val value: (Log[Id], Either[String, EVALUATED]) =
      (
        List.fill(500)("txVal" -> Right(dataTxObj)),
        Right(CONST_BOOLEAN(true))
      )

    @TearDown
    def deleteFile(): Unit = {
      Files.delete(resultFile)
      writer.close()
    }
  }
} 
Example 19
Source File: ElevationOverlay.scala    From geotrellis-osm-elevation   with Apache License 2.0 5 votes vote down vote up
package geotrellis.osme.core

import java.io.{BufferedWriter, FileWriter, File}

import com.vividsolutions.jts.geom.{LineString, MultiLineString}
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.vector.io.json.{GeoJson, JsonFeatureCollection}
import scala.collection.immutable.Map
import spray.json._
import DefaultJsonProtocol._
import geotrellis.vector.io.json.FeatureFormats.writeFeatureJson
import geotrellis.vector.io.json.GeometryFormats._
import geotrellis.vector.densify.DensifyMethods
import geotrellis.vector.dissolve.DissolveMethods
import geotrellis.vector._



    val segmentsFeatures = segments.map { segment =>
       val center = segment.centroid match {
         case PointResult(p) => p
         case NoResult => throw new Exception("No result found in PointOrNoResult")
       }
       val (col, row) = rasterExtent.mapToGrid(center)
       val elevation = geotiff.tile.getDouble(col, row)
       val meanvMap: Map[String, Double] = Map("MEANV" -> elevation)
       LineFeature(segment, meanvMap)
     }

    return segmentsFeatures.toTraversable


  }
} 
Example 20
Source File: ElevationSpec.scala    From geotrellis-osm-elevation   with Apache License 2.0 5 votes vote down vote up
package geotrellis.osme.core

import java.io.{FileWriter, BufferedWriter, File}

import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.vector.{Feature, Line, LineFeature}
import geotrellis.vector.io.json.GeoJson._
import spray.json.DefaultJsonProtocol._
import geotrellis.vector.io.json.{JsonFeatureCollection, GeoJson}
import spray.json.JsonReader
import scala.io.Source
import org.scalatest._


class ElevationSpec extends FunSpec with Matchers {

  def sharedData = {

    val geojson = Source.fromFile("data/imgn36w100vector.geojson").getLines.mkString
    val gjCol = parse[JsonFeatureCollection](geojson)

    new {
      val geotiff = SinglebandGeoTiff("data/imgn36w100_13_3_3.tif")
      val multiLine = gjCol.getAllLines().toMultiLine
      val elevationGeoJson = ElevationOverlay(geotiff, multiLine)
    }

  }

  describe("Core spec") {

    val numInputLines = sharedData.multiLine.lines.size
    val numOutputLines = sharedData.elevationGeoJson.size
    val ratio = numOutputLines / numInputLines
    println(s"Ratio of input lines to output lines: $ratio : 1")

    it("returned geojson should contain the MEANV property") {
      val elevationFeatures =  sharedData.elevationGeoJson
      val hasMeanV = elevationFeatures.forall(feat => feat.data.contains("MEANV"))
      assert(hasMeanV)
    }

    it("should produce a geojson file that can be put into geocolor.io") {
      val elevationFeatures =  sharedData.elevationGeoJson
      val jsonFeatures = JsonFeatureCollection(elevationFeatures)

      val file = new File("geocolor_test.json")
      val bw = new BufferedWriter(new FileWriter(file))
      bw.write(jsonFeatures.toJson.prettyPrint)
      bw.close()
    }

    it("Every feature should intersect the tile extent") {
      val elevationFeatures =  sharedData.elevationGeoJson
      val rasterPoly =  sharedData.geotiff.rasterExtent.extent.toPolygon()
      val doesIntersect = elevationFeatures.forall(feat => rasterPoly.intersects(feat.geom))
      assert(doesIntersect)
    }


  }
} 
Example 21
Source File: DevelopmentEmailNotifications.scala    From sundial   with MIT License 5 votes vote down vote up
package service.notifications

import java.io.{BufferedWriter, File, FileWriter}

import dao.SundialDaoFactory
import dto.DisplayModels
import model.{EmailNotification, ProcessStatus}
import software.amazon.awssdk.services.ses.SesClient

import scala.sys.process._

class DevelopmentEmailNotifications(daoFactory: SundialDaoFactory,
                                    displayModels: DisplayModels,
                                    sesClient: SesClient)
    extends EmailNotifications(daoFactory,
                               "[email protected]",
                               displayModels,
                               sesClient) {

  override def sendEmail(processStatus: ProcessStatus,
                         previousProcessStatus: Option[ProcessStatus],
                         teams: Seq[EmailNotification],
                         subject: String,
                         body: String): Unit = {
    val outfile = File.createTempFile("sundial", ".html")
    val bw = new BufferedWriter(new FileWriter(outfile))
    bw.write(body)
    bw.close()

    Seq("open", outfile.getAbsolutePath()).!
  }

} 
Example 22
Source File: FileDownloader.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import io.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

} 
Example 23
Source File: WhiskConfigTests.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core

import java.io.BufferedWriter
import java.io.File
import java.io.FileWriter

import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.Matchers
import org.scalatest.junit.JUnitRunner

import common.StreamLogging

@RunWith(classOf[JUnitRunner])
class WhiskConfigTests extends FlatSpec with Matchers with StreamLogging {

  behavior of "WhiskConfig"

  it should "get required property" in {
    val config = new WhiskConfig(WhiskConfig.edgeHost)
    assert(config.isValid)
    assert(config.edgeHost.nonEmpty)
  }

  it should "be valid when a prop file is provided defining required props" in {
    val file = File.createTempFile("cxt", ".txt")
    file.deleteOnExit()

    val bw = new BufferedWriter(new FileWriter(file))
    bw.write("a=A\n")
    bw.close()

    val config = new WhiskConfig(Map("a" -> null), Set.empty, file)
    assert(config.isValid && config("a") == "A")
  }

  it should "not be valid when a prop file is provided but does not define required props" in {
    val file = File.createTempFile("cxt", ".txt")
    file.deleteOnExit()

    val bw = new BufferedWriter(new FileWriter(file))
    bw.write("a=A\n")
    bw.close()

    val config = new WhiskConfig(Map("a" -> null, "b" -> null), Set.empty, file)
    assert(!config.isValid && config("b") == null)
  }

  it should "be valid when a prop file is provided defining required props and optional properties" in {
    val file = File.createTempFile("cxt", ".txt")
    file.deleteOnExit()

    val bw = new BufferedWriter(new FileWriter(file))
    bw.write("a=A\n")
    bw.write("b=B\n")
    bw.write("c=C\n")
    bw.close()

    val config = new WhiskConfig(Map("a" -> null, "b" -> "???"), Set("c", "d"), file, env = Map.empty)
    assert(config.isValid && config("a") == "A" && config("b") == "B")
    assert(config("c") == "C")
    assert(config("d") == "")
    assert(config("a", "c") == "C")
    assert(config("a", "d") == "A")
    assert(config("d", "a") == "A")
    assert(config("c", "a") == "A")
  }
} 
Example 24
Source File: BasicTestPerformance4Samba.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.it.performance

import java.io.{BufferedWriter, OutputStreamWriter}
import java.util.concurrent.atomic.LongAdder

import oharastream.ohara.client.filesystem.FileSystem
import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import org.junit.AssumptionViolatedException
import spray.json.{JsNumber, JsString, JsValue}

import scala.concurrent.duration.Duration
import scala.jdk.CollectionConverters._

abstract class BasicTestPerformance4Samba extends BasicTestPerformance {
  private[this] val sambaHostname: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_HOSTNAME_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_HOSTNAME_KEY} does not exists!!!")
  )

  private[this] val sambaUsername: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_USER_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_USER_KEY} does not exists!!!")
  )

  private[this] val sambaPassword: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_PASSWORD_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PASSWORD_KEY} does not exists!!!")
  )

  private[this] val sambaPort: Int = sys.env
    .getOrElse(
      PerformanceTestingUtils.SAMBA_PORT_KEY,
      throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PORT_KEY} does not exists!!!")
    )
    .toInt

  private[this] val sambaShare: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_SHARE_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_SHARE_KEY} does not exists!!!")
  )

  private[this] val csvInputFolderKey       = PerformanceTestingUtils.CSV_INPUT_KEY
  private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("input")

  private[this] val NEED_DELETE_DATA_KEY: String = PerformanceTestingUtils.DATA_CLEANUP_KEY
  protected[this] val needDeleteData: Boolean    = sys.env.getOrElse(NEED_DELETE_DATA_KEY, "true").toBoolean

  protected val sambaSettings: Map[String, JsValue] = Map(
    oharastream.ohara.connector.smb.SMB_HOSTNAME_KEY   -> JsString(sambaHostname),
    oharastream.ohara.connector.smb.SMB_PORT_KEY       -> JsNumber(sambaPort),
    oharastream.ohara.connector.smb.SMB_USER_KEY       -> JsString(sambaUsername),
    oharastream.ohara.connector.smb.SMB_PASSWORD_KEY   -> JsString(sambaPassword),
    oharastream.ohara.connector.smb.SMB_SHARE_NAME_KEY -> JsString(sambaShare)
  )

  protected def setupInputData(timeout: Duration): (String, Long, Long) = {
    val client = sambaClient()
    try {
      if (!client.exists(csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder)

      val result = generateData(
        numberOfRowsToFlush,
        timeout,
        (rows: Seq[Row]) => {
          val file        = s"$csvOutputFolder/${CommonUtils.randomString()}"
          val writer      = new BufferedWriter(new OutputStreamWriter(client.create(file)))
          val count       = new LongAdder()
          val sizeInBytes = new LongAdder()

          try {
            val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet
            writer
              .append(cellNames.mkString(","))
              .append("\n")
            rows.foreach(row => {
              val content = row.cells().asScala.map(_.value).mkString(",")
              count.increment()
              sizeInBytes.add(content.length)
              writer
                .append(content)
                .append("\n")
            })
            (count.longValue(), sizeInBytes.longValue())
          } finally Releasable.close(writer)
        }
      )
      (csvOutputFolder, result._1, result._2)
    } finally Releasable.close(client)
  }

  protected[this] def sambaClient(): FileSystem =
    FileSystem.smbBuilder
      .hostname(sambaHostname)
      .port(sambaPort)
      .user(sambaUsername)
      .password(sambaPassword)
      .shareName(sambaShare)
      .build()
} 
Example 25
Source File: TestHdfsFileSystem.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.client.filesystem.hdfs

import java.io.{BufferedWriter, File, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import oharastream.ohara.client.filesystem.{FileFilter, FileSystem, FileSystemTestBase}
import oharastream.ohara.common.exception.FileSystemException
import oharastream.ohara.common.util.CommonUtils
import org.junit.Test
import org.scalatest.matchers.should.Matchers._

class TestHdfsFileSystem extends FileSystemTestBase {
  private[this] val tempFolder: File = CommonUtils.createTempFolder("local_hdfs")

  private[this] val hdfsURL: String = new File(tempFolder.getAbsolutePath).toURI.toString

  override protected val fileSystem: FileSystem = FileSystem.hdfsBuilder.url(hdfsURL).build

  override protected val rootDir: String = tempFolder.toString

  // override this method because the Local HDFS doesn't support append()
  @Test
  override def testAppend(): Unit = {
    val file = randomFile()
    fileSystem.create(file).close()

    intercept[FileSystemException] {
      fileSystem.append(file)
    }.getMessage shouldBe "Not supported"
  }

  // override this method because the Local HDFS doesn't support append()
  @Test
  override def testDeleteFileThatHaveBeenRead(): Unit = {
    val file              = randomFile(rootDir)
    val data: Seq[String] = Seq("123", "456")
    val writer            = new BufferedWriter(new OutputStreamWriter(fileSystem.create(file), StandardCharsets.UTF_8))
    try data.foreach(line => {
      writer.append(line)
      writer.newLine()
    })
    finally writer.close()

    fileSystem.exists(file) shouldBe true
    fileSystem.readLines(file) shouldBe data
    fileSystem.delete(file)
    fileSystem.exists(file) shouldBe false
    fileSystem.listFileNames(rootDir, FileFilter.EMPTY).size shouldBe 0
  }
} 
Example 26
Source File: ForgerBoxMerklePathInfoTest.scala    From Sidechains-SDK   with MIT License 5 votes vote down vote up
package com.horizen.validation

import java.io.{BufferedReader, BufferedWriter, FileReader, FileWriter}
import java.lang.{Byte => JByte}
import java.util
import java.util.{ArrayList => JArrayList}

import com.horizen.box.ForgerBox
import com.horizen.fixtures.BoxFixture
import com.horizen.utils.{BytesUtils, ForgerBoxMerklePathInfo, ForgerBoxMerklePathInfoSerializer, MerklePath, Pair}
import com.horizen.vrf.VrfGeneratedDataProvider
import org.junit.Assert.{assertEquals, assertNotEquals, assertTrue}
import org.junit.Test
import org.scalatest.junit.JUnitSuite

class ForgerBoxMerklePathInfoTest extends JUnitSuite with BoxFixture {
  val vrfGenerationSeed = 907
  val vrfGenerationPrefix = "ForgerBoxMerklePathInfoTest"

  //uncomment if you want update vrf related data
  if (false) {
    VrfGeneratedDataProvider.updateVrfPublicKey(vrfGenerationPrefix, vrfGenerationSeed)
  }

  val forgerBox: ForgerBox = getForgerBox(
    getPrivateKey25519("123".getBytes()).publicImage(),
    1000L,
    100L,
    getPrivateKey25519("456".getBytes()).publicImage(),
    VrfGeneratedDataProvider.getVrfPublicKey(vrfGenerationPrefix, vrfGenerationSeed)
  )
  val emptyMerklePath: MerklePath = new MerklePath(new JArrayList())

  val nonEmptyMerklePath: MerklePath = new MerklePath(util.Arrays.asList(
    new Pair[JByte, Array[Byte]](0.toByte, BytesUtils.fromHexString("29d000eee85f08b6482026be2d92d081d6f9418346e6b2e9fe2e9b985f24ed1e")),
    new Pair[JByte, Array[Byte]](1.toByte, BytesUtils.fromHexString("61bfbdf7038dc7f21e2bcf193faef8e6caa8222af016a6ed86b9e9d860f046df"))
  ))

  @Test
  def comparison(): Unit = {
    assertNotEquals("Box merkle path info expected to be different.", emptyMerklePath, nonEmptyMerklePath)
  }

  @Test
  def serialization(): Unit = {
    // Test 1: empty merkle path (single element in merkle tree)
    val boxWithEmptyPath = ForgerBoxMerklePathInfo(forgerBox, emptyMerklePath)
    var boxBytes = boxWithEmptyPath.bytes
    var deserializedBox = ForgerBoxMerklePathInfoSerializer.parseBytes(boxBytes)
    assertEquals("Deserialized box merkle path info hashCode expected to be equal to the original one.", boxWithEmptyPath.hashCode(), deserializedBox.hashCode())
    assertEquals("Deserialized box merkle path info expected to be equal to the original one.", boxWithEmptyPath, deserializedBox)


    // Test 2: non empty merkle path
    val boxWithNonEmptyPath = ForgerBoxMerklePathInfo(forgerBox, nonEmptyMerklePath)
    boxBytes = boxWithNonEmptyPath.bytes
    deserializedBox = ForgerBoxMerklePathInfoSerializer.parseBytes(boxBytes)
    assertEquals("Deserialized box merkle path info hashCode expected to be equal to the original one.", boxWithNonEmptyPath.hashCode(), deserializedBox.hashCode())
    assertEquals("Deserialized box merkle path info expected to be equal to the original one.", boxWithNonEmptyPath, deserializedBox)

    // Set to true and run if you want to update regression data.
    if (false) {
      val out = new BufferedWriter(new FileWriter("src/test/resources/boxmerklepathinfo_hex"))
      out.write(BytesUtils.toHexString(boxBytes))
      out.close()
    }

    // Test 3: try to deserialize broken bytes.
    assertTrue("ForgerBoxMerklePathInfo expected to be not parsed due to broken data.", ForgerBoxMerklePathInfoSerializer.parseBytesTry("broken bytes".getBytes).isFailure)
  }

  @Test
  def serializationRegression(): Unit = {
    var bytes: Array[Byte] = null
    try {
      val classLoader = getClass.getClassLoader
      val file = new FileReader(classLoader.getResource("boxmerklepathinfo_hex").getFile)
      bytes = BytesUtils.fromHexString(new BufferedReader(file).readLine())
    }
    catch {
      case e: Exception =>
        fail(e.toString)
    }

    val boxMerklePathInfoTry = ForgerBoxMerklePathInfoSerializer.parseBytesTry(bytes)
    assertTrue("ForgerBoxMerklePathInfo expected to by parsed.", boxMerklePathInfoTry.isSuccess)

    val boxWithNonEmptyPath = ForgerBoxMerklePathInfo(forgerBox, nonEmptyMerklePath)
    assertEquals("Parsed info is different to original.", boxWithNonEmptyPath, boxMerklePathInfoTry.get)
  }
} 
Example 27
Source File: MNIST.scala    From spark-tsne   with Apache License 2.0 5 votes vote down vote up
package com.github.saurfang.spark.tsne.examples


import java.io.{BufferedWriter, OutputStreamWriter}

import com.github.saurfang.spark.tsne.impl._
import com.github.saurfang.spark.tsne.tree.SPTree
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory

object MNIST {
  private def logger = LoggerFactory.getLogger(MNIST.getClass)

  def main (args: Array[String]) {
    val conf = new SparkConf()
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses(Array(classOf[SPTree]))
    val sc = new SparkContext(conf)
    val hadoopConf = sc.hadoopConfiguration
    val fs = FileSystem.get(hadoopConf)

    val dataset = sc.textFile("data/MNIST/mnist.csv.gz")
      .zipWithIndex()
      .filter(_._2 < 6000)
      .sortBy(_._2, true, 60)
      .map(_._1)
      .map(_.split(","))
      .map(x => (x.head.toInt, x.tail.map(_.toDouble)))
      .cache()
    //logInfo(dataset.collect.map(_._2.toList).toList.toString)

    //val features = dataset.map(x => Vectors.dense(x._2))
    //val scaler = new StandardScaler(true, true).fit(features)
    //val scaledData = scaler.transform(features)
    //  .map(v => Vectors.dense(v.toArray.map(x => if(x.isNaN || x.isInfinite) 0.0 else x)))
    //  .cache()
    val data = dataset.flatMap(_._2)
    val mean = data.mean()
    val std = data.stdev()
    val scaledData = dataset.map(x => Vectors.dense(x._2.map(v => (v - mean) / std))).cache()

    val labels = dataset.map(_._1).collect()
    val matrix = new RowMatrix(scaledData)
    val pcaMatrix = matrix.multiply(matrix.computePrincipalComponents(50))
    pcaMatrix.rows.cache()

    val costWriter = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(s".tmp/MNIST/cost.txt"), true)))

    //SimpleTSNE.tsne(pcaMatrix, perplexity = 20, maxIterations = 200)
    BHTSNE.tsne(pcaMatrix, maxIterations = 500, callback = {
    //LBFGSTSNE.tsne(pcaMatrix, perplexity = 10, maxNumIterations = 500, numCorrections = 10, convergenceTol = 1e-8)
      case (i, y, loss) =>
        if(loss.isDefined) logger.info(s"$i iteration finished with loss $loss")

        val os = fs.create(new Path(s".tmp/MNIST/result${"%05d".format(i)}.csv"), true)
        val writer = new BufferedWriter(new OutputStreamWriter(os))
        try {
          (0 until y.rows).foreach {
            row =>
              writer.write(labels(row).toString)
              writer.write(y(row, ::).inner.toArray.mkString(",", ",", "\n"))
          }
          if(loss.isDefined) costWriter.write(loss.get + "\n")
        } finally {
          writer.close()
        }
    })
    costWriter.close()

    sc.stop()
  }
} 
Example 28
Source File: ConfigSpec.scala    From wookiee   with Apache License 2.0 5 votes vote down vote up
package com.webtrends.harness

import java.io.{BufferedWriter, File, FileWriter}
import java.util.concurrent.TimeUnit

import akka.actor.{Actor, ActorSystem, Props}
import akka.testkit.TestProbe
import com.typesafe.config.ConfigFactory
import com.webtrends.harness.app.HarnessActor.ConfigChange
import com.webtrends.harness.config.ConfigWatcherActor
import com.webtrends.harness.health.{ComponentState, HealthComponent}
import com.webtrends.harness.service.messages.CheckHealth
import org.specs2.mutable.SpecificationWithJUnit

import scala.concurrent.ExecutionContextExecutor
import scala.concurrent.duration.FiniteDuration
import scala.reflect.io.{Directory, Path}

class ConfigSpec extends SpecificationWithJUnit {
  implicit val dur = FiniteDuration(2, TimeUnit.SECONDS)
  new File("services/test/conf").mkdirs()
  implicit val sys = ActorSystem("system", ConfigFactory.parseString( """
    akka.actor.provider = "akka.actor.LocalActorRefProvider"
    services { path = "services" }
    """).withFallback(ConfigFactory.load))

  implicit val ec: ExecutionContextExecutor =  sys.dispatcher

  val probe = TestProbe()
  val parent = sys.actorOf(Props(new Actor {
    val child = context.actorOf(ConfigWatcherActor.props, "child")
    def receive = {
      case x if sender == child => probe.ref forward x
      case x => child forward x
    }
  }))

  sequential

  "config " should {
    "be in good health" in {
      probe.send(parent, CheckHealth)
      val msg = probe.expectMsgClass(classOf[HealthComponent])
      msg.state equals ComponentState.NORMAL
    }

    "detect changes in config" in {
      val file = new File("services/test/conf/test.conf")
      val bw = new BufferedWriter(new FileWriter(file))
      bw.write("test = \"value\"")
      bw.close()
      val msg = probe.expectMsgClass(classOf[ConfigChange])
      msg.isInstanceOf[ConfigChange]
    }
  }

  step {
    sys.terminate().onComplete { _ =>
        Directory(Path(new File("services"))).deleteRecursively()
    }
  }
} 
Example 29
Source File: Preprocess.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package com.packt.ScalaML.BitCoin

import java.io.{ BufferedWriter, File, FileWriter }
import org.apache.spark.sql.types.{ DoubleType, IntegerType, StructField, StructType }
import org.apache.spark.sql.{ DataFrame, Row, SparkSession }
import scala.collection.mutable.ListBuffer

object Preprocess {
  //how many of first rows are omitted
    val dropFirstCount: Int = 612000

    def rollingWindow(data: DataFrame, window: Int, xFilename: String, yFilename: String): Unit = {
      var i = 0
      val xWriter = new BufferedWriter(new FileWriter(new File(xFilename)))
      val yWriter = new BufferedWriter(new FileWriter(new File(yFilename)))

      val zippedData = data.rdd.zipWithIndex().collect()
      System.gc()
      val dataStratified = zippedData.drop(dropFirstCount) //todo slice fisrt 614K
      while (i < (dataStratified.length - window)) {
        val x = dataStratified
          .slice(i, i + window)
          .map(r => r._1.getAs[Double]("Delta")).toList
        val y = dataStratified.apply(i + window)._1.getAs[Integer]("label")
        val stringToWrite = x.mkString(",")
        xWriter.write(stringToWrite + "\n")
        yWriter.write(y + "\n")

        i += 1
        if (i % 10 == 0) {
          xWriter.flush()
          yWriter.flush()
        }
      }

      xWriter.close()
      yWriter.close()
    }
    
  def main(args: Array[String]): Unit = {
    //todo modify these variables to match desirable files
    val priceDataFileName: String = "C:/Users/admin-karim/Desktop/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv"
    val outputDataFilePath: String = "output/scala_test_x.csv"
    val outputLabelFilePath: String = "output/scala_test_y.csv"

    val spark = SparkSession
      .builder()
      .master("local[*]")
      .config("spark.sql.warehouse.dir", "E:/Exp/")
      .appName("Bitcoin Preprocessing")
      .getOrCreate()

    val data = spark.read.format("com.databricks.spark.csv").option("header", "true").load(priceDataFileName)
    data.show(10)
    println((data.count(), data.columns.size))

    val dataWithDelta = data.withColumn("Delta", data("Close") - data("Open"))

    import org.apache.spark.sql.functions._
    import spark.sqlContext.implicits._

    val dataWithLabels = dataWithDelta.withColumn("label", when($"Close" - $"Open" > 0, 1).otherwise(0))
    rollingWindow(dataWithLabels, 22, outputDataFilePath, outputLabelFilePath)    
    spark.stop()
  }
} 
Example 30
Source File: WriteSupport.scala    From CodeAnalyzerTutorial   with Apache License 2.0 5 votes vote down vote up
package tutor.utils

import java.io.{BufferedWriter, File, FileWriter, Writer}

trait WriteSupport {

  def withWriter(path: String)(f: Writer => Unit): Unit ={
    var writer: Writer = null
    try {
      val file = new File(path)
      if (!file.exists()) file.createNewFile()
      writer = new BufferedWriter(new FileWriter(file))
      f(writer)
      writer.flush()
    } finally {
      if (writer != null) writer.close()
    }
  }
} 
Example 31
Source File: ApiGwRestEndToEndTests.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package apigw.healthtests

import java.io.BufferedWriter
import java.io.File
import java.io.FileWriter

import akka.http.scaladsl.model.StatusCodes.OK

import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

import common.TestUtils._
import common.rest.WskRestOperations
import common.rest.RestResult
import common.WskActorSystem

@RunWith(classOf[JUnitRunner])
class ApiGwRestEndToEndTests extends ApiGwEndToEndTests with WskActorSystem {

  override lazy val wsk = new WskRestOperations
  override val createCode = OK.intValue

  override def verifyAPICreated(rr: RunResult): Unit = {
    val apiResultRest = rr.asInstanceOf[RestResult]
    apiResultRest.statusCode shouldBe OK
    val apiurl = apiResultRest.getField("gwApiUrl") + "/path"
    println(s"apiurl: '$apiurl'")
  }

  override def verifyAPIList(rr: RunResult,
                             actionName: String,
                             testurlop: String,
                             testapiname: String,
                             testbasepath: String,
                             testrelpath: String): Unit = {
    val apiResultRest = rr.asInstanceOf[RestResult]
    val apiValue = RestResult.getFieldJsObject(apiResultRest.getFieldListJsObject("apis")(0), "value")
    val apidoc = RestResult.getFieldJsObject(apiValue, "apidoc")
    val basepath = RestResult.getField(apidoc, "basePath")
    basepath shouldBe testbasepath

    val paths = RestResult.getFieldJsObject(apidoc, "paths")
    paths.fields.contains(testrelpath) shouldBe true

    val info = RestResult.getFieldJsObject(apidoc, "info")
    val title = RestResult.getField(info, "title")
    title shouldBe testapiname

    val relpath = RestResult.getFieldJsObject(paths, testrelpath)
    val urlop = RestResult.getFieldJsObject(relpath, testurlop)
    val openwhisk = RestResult.getFieldJsObject(urlop, "x-openwhisk")
    val actionN = RestResult.getField(openwhisk, "action")
    actionN shouldBe actionName
  }

  override def verifyAPISwaggerCreated(rr: RunResult): Unit = {
    val apiResultRest = rr.asInstanceOf[RestResult]
    apiResultRest.statusCode shouldBe OK
  }

  override def writeSwaggerFile(rr: RunResult): File = {
    val swaggerfile = File.createTempFile("api", ".json")
    swaggerfile.deleteOnExit()
    val bw = new BufferedWriter(new FileWriter(swaggerfile))
    val apiResultRest = rr.asInstanceOf[RestResult]
    val apiValue = RestResult.getFieldJsObject(apiResultRest.getFieldListJsObject("apis")(0), "value")
    val apidoc = RestResult.getFieldJsObject(apiValue, "apidoc")
    bw.write(apidoc.toString())
    bw.close()
    swaggerfile
  }

  override def getSwaggerApiUrl(rr: RunResult): String = {
    val apiResultRest = rr.asInstanceOf[RestResult]
    apiResultRest.getField("gwApiUrl") + "/path"
  }
} 
Example 32
Source File: RuleStatLocator.scala    From apalache   with Apache License 2.0 5 votes vote down vote up
package at.forsyte.apalache.tla.bmcmt.profiler

import java.io.{BufferedWriter, FileWriter, PrintWriter}

import scala.collection.immutable.SortedMap


class RuleStatLocator {
  private var ruleStats: Map[String, RuleStat] = Map()

  def getRuleStat(ruleName: String): RuleStat = {
    ruleStats.get(ruleName) match {
      case Some(r) => r
      case None =>
        val newRule = new RuleStat(ruleName)
        ruleStats += ruleName -> newRule
        newRule
    }
  }

  def getStats = SortedMap(ruleStats.toSeq :_*)

  def writeStats(filename: String): Unit = {
    val writer = new PrintWriter(new FileWriter(filename, false))
    writer.println("Rule profiling statistics")
    val hrule = List.fill(80)('-').mkString
    writer.println(hrule)
    writer.println("%20s %9s %9s %9s %9s %9s"
      .format("name", "calls", "cells", "smt-consts", "smt-asserts", "smt-avg-size"))
    writer.println(hrule)
    val stats = ruleStats.values.toSeq.sortWith(_.nCalls > _.nCalls)
    for (rs <- stats) {
      writer.println("%-20s %9d %9d %9d %9d %9d"
        .format(rs.ruleName, rs.nCalls, rs.nCellsSelf, rs.nSmtConstsSelf, rs.nSmtAssertsSelf, rs.smtAssertsSizeAvg))
    }
    writer.close()
  }
} 
Example 33
Source File: HTMLReportGenerator.scala    From regressr   with Apache License 2.0 5 votes vote down vote up
package org.ebayopensource.regression.internal.reportGenerator

import java.io.{BufferedWriter, File, FileWriter}

import org.fusesource.scalate.{TemplateEngine, TemplateSource}

import scala.io.Source
import scala.util.{Failure, Success, Try}


class HTMLReportGenerator extends ReportGenerator {

  val scalateEngine = new TemplateEngine

  def getContent(reportEntries: Seq[ReportEntry]) : Try[String] = Try {

    if (reportEntries.size==0) {
      throw new IllegalArgumentException("Cannot generate report with 0 reportEntries.")
    }

    val templateText = Source.fromInputStream(getClass.getResourceAsStream("/report/index.html")).mkString
    scalateEngine.escapeMarkup = false

    val regressionCount :Seq[Int] = reportEntries.flatMap {
      reportEntry => {
        reportEntry.requestReportEntries.map {
          requestReportEntry => {
            requestReportEntry.reqMessages.size
          }
        }
      }
    }

    val renderedContent = scalateEngine.layout(TemplateSource.fromText("/com/ebay/n/regression/text.ssp", templateText),
      Map("reportEntries" -> reportEntries, "regressionCount" -> regressionCount.sum))
    renderedContent
  }

  def writeAndGetFile(content: String, reportFilePath: String) : Try[File] = Try {
    val outputFile = new File(reportFilePath)
    val bw = new BufferedWriter(new FileWriter(outputFile))
    bw.write(content)
    bw.close()
    outputFile
  }

  override def generate(reportEntries: Seq[ReportEntry], reportFilePath: String): Try[File] = Try {
    getContent(reportEntries).flatMap {
      content => writeAndGetFile(content, reportFilePath)
    } match {
      case Success(file) => file
      case Failure(t) => throw t
    }
  }
} 
Example 34
Source File: Xml.scala    From temperature-machine   with Apache License 2.0 5 votes vote down vote up
package bad.robot.temperature.rrd

import java.io.{BufferedWriter, FileWriter}

import bad.robot.temperature.{FileOps, Files, JsonOps, encode}
import bad.robot.temperature.rrd.ChartJson._
import org.rrd4j.ConsolFun._
import org.rrd4j.core.RrdDb
import bad.robot.temperature.Files._
import scala.collection.JavaConverters._
import scala.xml.{Elem, XML}

case class Xml(xml: Elem) {
  def exportXml(filename: String) = {
    XML.save(Files.path / filename, xml)
  }

  def exportJson(filename: String) = {
    val writer = new BufferedWriter(new FileWriter(Files.path / filename))
    writer.write(toJson())
    writer.close()
  }

  def toJson(): String = {
    val series = parse(xml)
    encode(series).spaces2ps
  }

}

object Xml {
  def apply(start: Seconds, end: Seconds, hosts: List[Host]): Xml = {
    val database = new RrdDb(RrdFile.file)
    val request = database.createFetchRequest(AVERAGE, start, end)
    val sensors = for {
      host   <- hosts
      sensor <- 1 to RrdFile.MaxSensors
    } yield {
      s"${host.name}-sensor-$sensor"
    }
    request.setFilter(nonEmpty(sensors, database).asJava)
    val data = request.fetchData()
    val xml = data.exportXml()
    new Xml(XML.loadString(xml))
  }

  def nonEmpty(sensors: List[String], database: RrdDb) = sensors.filter(database.hasValuesFor).toSet

} 
Example 35
Source File: JsonFileTest.scala    From temperature-machine   with Apache License 2.0 5 votes vote down vote up
package bad.robot.temperature.server

import java.io.{BufferedWriter, FileWriter}

import org.specs2.matcher.DisjunctionMatchers.be_\/-
import org.specs2.mutable.Specification

class JsonFileTest extends Specification {

  val exampleJson =
    """
      |[
      |  {
      |    "label": "bedroom1-sensor-1",
      |    "data": [
      |      {
      |        "x": 1507709610000,
      |        "y": "NaN"
      |      },
      |      {
      |        "x": 1507709640000,
      |        "y": "+2.2062500000E01"
      |      },
      |      {
      |        "x": 1507709680000,
      |        "y": "+2.2262500000E01"
      |      }
      |    ]
      |  }
      |]
    """.stripMargin

  "Load a file" >> {
    createFile()
    JsonFile.load must be_\/-(exampleJson)
  }
  
  private def createFile() = {
    val writer = new BufferedWriter(new FileWriter(JsonFile.file))
    writer.write(exampleJson)
    writer.close()
  }

} 
Example 36
Source File: TestLoadDataWithJunkChars.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.integration.spark.testsuite.dataload

import java.io.{BufferedWriter, File, FileWriter}
import java.util.Random

import org.apache.spark.sql.Row
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

class TestLoadDataWithJunkChars extends QueryTest with BeforeAndAfterAll {
  var filePath = ""
  val junkchars = "ǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰ"

  def buildTestData() = {
    filePath = s"$integrationPath/spark/target/junkcharsdata.csv"
    val file = new File(filePath)
    val writer = new BufferedWriter(new FileWriter(file))
    writer.write("c1,c2\n")
    val random = new Random
    for (i <- 1 until 1000) {
      writer.write("a" + i + "," + junkchars + "\n")
      if ( i % 100 == 0) {
        writer.flush()
      }
    }
    writer.write("a1000000," + junkchars)
    writer.close
  }

  test("[bug]fix bug of duplicate rows in UnivocityCsvParser #877") {
    buildTestData()
    sql("drop table if exists junkcharsdata")
    sql("""create table if not exists junkcharsdata
             (c1 string, c2 string)
             STORED AS carbondata""")
    sql(s"LOAD DATA LOCAL INPATH '$filePath' into table junkcharsdata")
    checkAnswer(sql("select count(*) from junkcharsdata"), Seq(Row(1000)))
    sql("drop table if exists junkcharsdata")
    new File(filePath).delete()
  }
} 
Example 37
Source File: SessionDataFileHDFSWriter.scala    From spark_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.spark.training.streaming.dstream.sessionization

import java.io.BufferedWriter
import java.io.FileWriter
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import java.io.OutputStreamWriter
import org.apache.hadoop.fs.Path
import java.util.Random

object SessionDataFileHDFSWriter {
  
  val eol = System.getProperty("line.separator");  
  
  def main(args: Array[String]) {
    if (args.length == 0) {
        println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}");
        return;
    }
    val conf = new Configuration
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"))
    
    val fs = FileSystem.get(new Configuration)
    val rootTempDir = args(0)
    val rootDistDir = args(1)
    val files = args(2).toInt
    val loops = args(3).toInt
    val waitBetweenFiles = args(4).toInt
    val r = new Random
    for (f <- 1 to files) {
      val rootName = "/weblog." + System.currentTimeMillis()
      val tmpPath = new Path(rootTempDir + rootName + ".tmp")
      val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath)))
      
      print(f + ": [")
      
      val randomLoops = loops + r.nextInt(loops)
      
      for (i <- 1 to randomLoops) {
        writer.write(SessionDataGenerator.getNextEvent + eol)
        if (i%100 == 0) {
          print(".")
        }
      }
      println("]")
      writer.close
      
      val distPath = new Path(rootDistDir + rootName + ".dat")
      
      fs.rename(tmpPath, distPath)
      Thread.sleep(waitBetweenFiles)
    }
    println("Done")
  }
} 
Example 38
Source File: SessionDataFileWriter.scala    From spark_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.spark.training.streaming.dstream.sessionization

import java.io.BufferedWriter
import java.io.FileWriter

object SessionDataFileWriter {
  
  val eol = System.getProperty("line.separator");  
  
  def main(args: Array[String]) {
    if (args.length == 0) {
        println("SessionDataFileWriter {numberOfRecords} {outputFile} ");
        return;
    }
    
    val writer = new BufferedWriter(new FileWriter(args(1)))
    val loops = args(0).toInt
    
    for (i <- 1 to loops) {
      writer.write(SessionDataGenerator.getNextEvent + eol)
    }
    
    writer.close
  }
} 
Example 39
Source File: TreeNode.scala    From AppCrawler   with Apache License 2.0 5 votes vote down vote up
package com.testerhome.appcrawler

import java.io.{BufferedWriter, FileWriter}

import scala.collection.mutable.ListBuffer


case class TreeNode[T](
                        value: T,
                        children: ListBuffer[TreeNode[T]] = ListBuffer[TreeNode[T]]()
                      ) {

  def equals(node: TreeNode[T]): Boolean = {
    node.value == this.value
  }


  def find(tree: TreeNode[T], node: TreeNode[T]): Option[TreeNode[T]] = {
    if (tree.equals(node)) {
      return Some(tree)
    }
    tree.children.foreach(t => {
      find(t, node) match {
        case Some(v) => return Some(v)
        case None => {}
      }
    })
    None
  }

  def appendNode(currenTree: TreeNode[T], node: TreeNode[T]): TreeNode[T] = {
    find(currenTree, node) match {
      case Some(v) => {
        v
      }
      case None => {
        this.children.append(node)
        node
      }
    }
  }


  def toXml(tree: TreeNode[T]): String = {
    val s=new StringBuffer()
    val before = (tree: TreeNode[T]) => {
      s.append(s"""<node TEXT="${xml.Utility.escape(tree.value.toString)}">""")
      //todo: 增加图片地址链接   LINK="file:///Users/seveniruby/projects/LBSRefresh/Android_20160216105737/946_StockDetail-Back--.png"
    }
    val after = (tree: TreeNode[T]) => {
      s.append("</node>")
      s.append("\n")
    }

    s.append("""<map version="1.0.1">""")
    s.append("\n")
    traversal[T](tree, before, after)
    s.append("</map>")
    s.toString
  }

  def traversal[T](tree: TreeNode[T],
                   before: (TreeNode[T]) => Any = (x: TreeNode[T]) => Unit,
                   after: (TreeNode[T]) => Any = (x: TreeNode[T]) => Unit): Unit = {
    before(tree)
    tree.children.foreach(t => {
      traversal(t, before, after)
    })
    after(tree)
  }

  def generateFreeMind(list: ListBuffer[T], path:String=null): String = {
    if(list.isEmpty){
      return ""
    }
    val root=TreeNode(list.head)
    var currentNode=root
    list.slice(1, list.size).foreach(e=>{
      currentNode=currentNode.appendNode(root, TreeNode(e))
    })
    val xml=toXml(root)
    if(path!=null){
      val file = new java.io.File(path)
      val bw = new BufferedWriter(new FileWriter(file))
      bw.write(xml)
      bw.close()
    }
    xml
  }

} 
Example 40
Source File: SortedStringWriter.scala    From ScalaClean   with Apache License 2.0 5 votes vote down vote up
package org.scalaclean.analysis

import java.io.BufferedWriter
import java.nio.file.{Files, Path, StandardOpenOption}

import scala.collection.mutable

// TODO This class should not be needed - use StringWriter instead
// however ElementWriter appears to be very sensitive to write order right now
class SortedStringWriter(targetPath: Path) {
  val target: BufferedWriter = Files.newBufferedWriter(targetPath,
    StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE, StandardOpenOption.CREATE)

  private val strings: mutable.Set[String] = mutable.SortedSet[String]()

  def writeLine(s: String): Boolean = {
    strings.add(s)
  }

  def flush(): Unit = {
    strings.toVector.foreach { line =>
      target.write(line)
      target.newLine()
    }
    strings.clear
    target.flush()
  }

  def close(): Unit = {
    flush()
    target.close()
  }
} 
Example 41
Source File: StarsAnalysisDemo.scala    From CkoocNLP   with Apache License 2.0 5 votes vote down vote up
package applications.analysis

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter}

import functions.segment.Segmenter
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}


object StarsAnalysisDemo {
  def main(args: Array[String]) {
    Logger.getLogger("org").setLevel(Level.WARN)

    val spark = SparkSession
      .builder
      .master("local[2]")
      .appName("Stars Analysis Demo")
      .getOrCreate()

    val filePath = "E:/data/chinaNews/entertainment.txt"


    // 加载数据,并保留年份和内容字段,并对内容字段进行过滤
    import spark.implicits._
    val data = spark.sparkContext.textFile(filePath).flatMap { line =>
      val tokens: Array[String] = line.split("\u00ef")
      if (tokens.length > 3) {
        var year: String = tokens(2).split("-")(0)
        if (tokens(2).contains("年")) year = tokens(2).split("年")(0)

        var content = tokens(3)
        if (content.length > 22 && content.substring(0, 20).contains("日电")) {
          content = content.substring(content.indexOf("日电") + 2, content.length).trim
        }

        if (content.startsWith("(")) content = content.substring(content.indexOf(")") + 1, content.length)
        if (content.length > 20 && content.substring(content.length - 20, content.length).contains("记者")) {
          content = content.substring(0, content.lastIndexOf("记者")).trim
        }

        Some(year, content)
      } else None
    }.toDF("year", "content")

    // 分词,去除长度为1的词,每个词保留词性
    val segmenter = new Segmenter()
      .isAddNature(true)
      .isDelEn(true)
      .isDelNum(true)
      .setMinTermLen(2)
      .setMinTermNum(5)
      .setSegType("StandardSegment")
      .setInputCol("content")
      .setOutputCol("segmented")
    val segDF: DataFrame = segmenter.transform(data)
    segDF.cache()

    val segRDD: RDD[(Int, Seq[String])] = segDF.select("year", "segmented").rdd.map { case Row(year: String, terms: Seq[String]) =>
      (Integer.parseInt(year), terms)
    }

    val result: Array[String] = segRDD.map(line => line._1.toString + "\u00ef" + line._2.mkString(",")).collect()
    val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("E:/entertainment_seg.txt")))
    result.foreach(line => writer.write(line + "\n"))
    writer.close()

    // 统计2016出现在新闻中最多的明星
    val stars2016 = segRDD.filter(_._1 == 2016)
      .flatMap { case (year: Int, termStr: Seq[String]) =>
        val person = termStr
          .map(term => (term.split("/")(0), term.split("/")(1)))
          .filter(_._2.equalsIgnoreCase("nr"))
          .map(term => (term._1, 1L))

        person
      }
      .reduceByKey(_ + _)
      .sortBy(_._2, ascending = false)

    segDF.unpersist()

    stars2016.take(100).foreach(println)

    spark.stop()
  }
} 
Example 42
Source File: FileSystem.scala    From ohara   with Apache License 2.0 4 votes vote down vote up
package oharastream.ohara.client.filesystem

import java.io.{BufferedReader, BufferedWriter, IOException, InputStreamReader, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import oharastream.ohara.client.filesystem.ftp.FtpFileSystem
import oharastream.ohara.client.filesystem.hdfs.HdfsFileSystem
import oharastream.ohara.client.filesystem.smb.SmbFileSystem
import oharastream.ohara.common.exception.FileSystemException

trait FileSystem extends oharastream.ohara.kafka.connector.storage.FileSystem {
  
  def readLines(path: String, encode: String = "UTF-8"): Array[String] = {
    val reader = new BufferedReader(new InputStreamReader(open(path), Charset.forName(encode)))
    try Iterator.continually(reader.readLine()).takeWhile(_ != null).toArray
    finally reader.close()
  }

  def wrap[T](f: () => T): T =
    try {
      f()
    } catch {
      case e: IOException           => throw new FileSystemException(e.getMessage, e)
      case e: IllegalStateException => throw new FileSystemException(e.getMessage, e)
    }
}

object FileSystem {
  def hdfsBuilder: HdfsFileSystem.Builder = HdfsFileSystem.builder
  def ftpBuilder: FtpFileSystem.Builder   = FtpFileSystem.builder
  def smbBuilder: SmbFileSystem.Builder   = SmbFileSystem.builder
}