java.io.InputStream Scala Examples

The following examples show how to use java.io.InputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: ClientLogManager.scala    From Linkis   with Apache License 2.0 8 votes vote down vote up
package com.webank.wedatasphere.linkis.entranceclient.context

import java.io.{InputStream, OutputStream}

import com.webank.wedatasphere.linkis.entrance.log._
import com.webank.wedatasphere.linkis.scheduler.queue.Job
import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration
import org.apache.commons.io.input.NullInputStream
import org.apache.commons.io.output.NullOutputStream


class ClientLogManager extends CacheLogManager {
  override def getLogReader(execId: String): LogReader = {
    new CacheLogReader("", ServerConfiguration.BDP_SERVER_ENCODING.getValue,
      new Cache(10), "") {
      override def getInputStream: InputStream = new NullInputStream(0)
    }
  }

  override def createLogWriter(job: Job): LogWriter = {
    new NullCacheLogWriter(ServerConfiguration.BDP_SERVER_ENCODING.getValue,
      new Cache(20))
  }
  class NullCacheLogWriter(charset:String,
                           sharedCache:Cache,
                           override protected val outputStream: OutputStream = new NullOutputStream)
    extends CacheLogWriter("", charset, sharedCache, "")
} 
Example 2
Source File: Queries.scala    From daml   with Apache License 2.0 7 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.on.sql.queries

import java.io.InputStream
import java.sql.{Blob, Connection, PreparedStatement}

import anorm.{
  BatchSql,
  Column,
  MetaDataItem,
  NamedParameter,
  RowParser,
  SqlMappingError,
  SqlParser,
  SqlRequestError,
  ToStatement
}
import com.google.protobuf.ByteString

trait Queries extends ReadQueries with WriteQueries

object Queries {
  val TablePrefix = "ledger"
  val LogTable = s"${TablePrefix}_log"
  val MetaTable = s"${TablePrefix}_meta"
  val StateTable = s"${TablePrefix}_state"

  // By explicitly writing a value to a "table_key" column, we ensure we only ever have one row in
  // the meta table. An attempt to write a second row will result in a key conflict.
  private[queries] val MetaTableKey = 0

  def executeBatchSql(
      query: String,
      params: Iterable[Seq[NamedParameter]],
  )(implicit connection: Connection): Unit = {
    if (params.nonEmpty)
      BatchSql(query, params.head, params.drop(1).toArray: _*).execute()
    ()
  }

  implicit def byteStringToStatement: ToStatement[ByteString] = new ToStatement[ByteString] {
    override def set(s: PreparedStatement, index: Int, v: ByteString): Unit =
      s.setBinaryStream(index, v.newInput(), v.size())
  }

  implicit def columnToByteString: Column[ByteString] =
    Column.nonNull { (value: Any, meta: MetaDataItem) =>
      value match {
        case blob: Blob => Right(ByteString.readFrom(blob.getBinaryStream))
        case byteArray: Array[Byte] => Right(ByteString.copyFrom(byteArray))
        case inputStream: InputStream => Right(ByteString.readFrom(inputStream))
        case _ =>
          Left[SqlRequestError, ByteString](
            SqlMappingError(s"Cannot convert value of column ${meta.column} to ByteString"))
      }
    }

  def getBytes(columnName: String): RowParser[ByteString] =
    SqlParser.get(columnName)(columnToByteString)

} 
Example 3
Source File: CommandUtils.scala    From drizzle-spark   with Apache License 2.0 7 votes vote down vote up
package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
} 
Example 4
Source File: GuardedProcess.scala    From shadowsocksr-android   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.shadowsocks

import java.io.{IOException, InputStream, OutputStream}
import java.lang.System.currentTimeMillis
import java.util.concurrent.Semaphore

import android.util.Log

import scala.collection.JavaConversions._


class GuardedProcess(cmd: Seq[String]) extends Process {
  private val TAG = classOf[GuardedProcess].getSimpleName

  @volatile private var guardThread: Thread = _
  @volatile private var isDestroyed: Boolean = _
  @volatile private var process: Process = _
  @volatile private var isRestart = false

  def start(onRestartCallback: () => Unit = null): GuardedProcess = {
    val semaphore = new Semaphore(1)
    semaphore.acquire
    @volatile var ioException: IOException = null

    guardThread = new Thread(() => {
      try {
        var callback: () => Unit = null
        while (!isDestroyed) {
          Log.i(TAG, "start process: " + cmd)
          val startTime = currentTimeMillis

          process = new ProcessBuilder(cmd).redirectErrorStream(true).start

          if (callback == null) callback = onRestartCallback else callback()

          semaphore.release
          process.waitFor

          if (isRestart) {
            isRestart = false
          } else {
            if (currentTimeMillis - startTime < 1000) {
              Log.w(TAG, "process exit too fast, stop guard: " + cmd)
              isDestroyed = true
            }
          }
        }
      } catch {
        case ignored: InterruptedException =>
          Log.i(TAG, "thread interrupt, destroy process: " + cmd)
          process.destroy()
        case e: IOException => ioException = e
      } finally semaphore.release
    }, "GuardThread-" + cmd)

    guardThread.start()
    semaphore.acquire

    if (ioException != null) {
      throw ioException
    }

    this
  }

  def destroy() {
    isDestroyed = true
    guardThread.interrupt()
    process.destroy()
    try guardThread.join() catch {
      case ignored: InterruptedException =>
    }
  }

  def restart() {
    isRestart = true
    process.destroy()
  }

  def exitValue: Int = throw new UnsupportedOperationException
  def getErrorStream: InputStream = throw new UnsupportedOperationException
  def getInputStream: InputStream = throw new UnsupportedOperationException
  def getOutputStream: OutputStream = throw new UnsupportedOperationException

  @throws(classOf[InterruptedException])
  def waitFor = {
    guardThread.join()
    0
  }
} 
Example 5
Source File: SnowflakeRDD.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake.io

import java.io.InputStream

import net.snowflake.spark.snowflake.io.SupportedFormat.SupportedFormat
import org.apache.spark.{Partition, SparkContext, TaskContext}
import org.apache.spark.rdd.RDD

class SnowflakeRDD(sc: SparkContext,
                   fileNames: List[String],
                   format: SupportedFormat,
                   downloadFile: String => InputStream,
                   expectedPartitionCount: Int)
    extends RDD[String](sc, Nil) {

  @transient private val MIN_FILES_PER_PARTITION = 2
  @transient private val MAX_FILES_PER_PARTITION = 10

  override def compute(split: Partition,
                       context: TaskContext): Iterator[String] = {
    val snowflakePartition = split.asInstanceOf[SnowflakePartition]

    val stringIterator = new SFRecordReader(format, snowflakePartition.index)
    stringIterator.setDownloadFunction(downloadFile)

    snowflakePartition.fileNames.foreach(name => {
      stringIterator.addFileName(name)
    })

    logger.info(
      s"""${SnowflakeResultSetRDD.WORKER_LOG_PREFIX}: Start reading
         | partition ID:${snowflakePartition.index}
         | totalFileCount=${snowflakePartition.fileNames.size}
         |""".stripMargin.filter(_ >= ' '))

    stringIterator
  }

  override protected def getPartitions: Array[Partition] = {
    var fileCountPerPartition =
      Math.max(
        MIN_FILES_PER_PARTITION,
        (fileNames.length + expectedPartitionCount / 2) / expectedPartitionCount
      )
    fileCountPerPartition = Math.min(MAX_FILES_PER_PARTITION, fileCountPerPartition)
    val fileCount = fileNames.length
    val partitionCount = (fileCount + fileCountPerPartition - 1) / fileCountPerPartition
    logger.info(s"""${SnowflakeResultSetRDD.MASTER_LOG_PREFIX}: Total statistics:
         | fileCount=$fileCount filePerPartition=$fileCountPerPartition
         | actualPartitionCount=$partitionCount
         | expectedPartitionCount=$expectedPartitionCount
         |""".stripMargin.filter(_ >= ' '))

    if (fileNames.nonEmpty) {
      fileNames
        .grouped(fileCountPerPartition)
        .zipWithIndex
        .map {
          case (names, index) => SnowflakePartition(names, id, index)
        }
        .toArray
    } else {
      // If the result set is empty, put one empty partition to the array.
      Seq[SnowflakePartition]{SnowflakePartition(fileNames, 0, 0)}.toArray
    }
  }

}

private case class SnowflakePartition(fileNames: List[String],
                                      rddId: Int,
                                      index: Int)
    extends Partition {

  override def hashCode(): Int = 31 * (31 + rddId) + index

  override def equals(other: Any): Boolean = super.equals(other)
} 
Example 6
Source File: ValueSerializer.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.store.serialization

import java.io.InputStream

import com.daml.lf.archive.{Decode, Reader}
import com.daml.lf.value.Value.{ContractId, VersionedValue}
import com.daml.lf.value.{ValueCoder, ValueOuterClass}

object ValueSerializer {

  def serializeValue(
      value: VersionedValue[ContractId],
      errorContext: => String,
  ): Array[Byte] =
    ValueCoder
      .encodeVersionedValueWithCustomVersion(ValueCoder.CidEncoder, value)
      .fold(error => sys.error(s"$errorContext (${error.errorMessage})"), _.toByteArray)

  private def deserializeValueHelper(
      stream: InputStream,
      errorContext: => Option[String],
  ): VersionedValue[ContractId] =
    ValueCoder
      .decodeVersionedValue(
        ValueCoder.CidDecoder,
        ValueOuterClass.VersionedValue.parseFrom(
          Decode.damlLfCodedInputStream(stream, Reader.PROTOBUF_RECURSION_LIMIT)))
      .fold(
        error =>
          sys.error(errorContext.fold(error.errorMessage)(ctx => s"$ctx (${error.errorMessage})")),
        identity
      )

  def deserializeValue(
      stream: InputStream,
  ): VersionedValue[ContractId] =
    deserializeValueHelper(stream, None)

  def deserializeValue(
      stream: InputStream,
      errorContext: => String,
  ): VersionedValue[ContractId] =
    deserializeValueHelper(stream, Some(errorContext))

} 
Example 7
Source File: ContractSerializer.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package db.migration.translation

import java.io.InputStream

import com.daml.lf.archive.{Decode, Reader}
import com.daml.lf.transaction.{TransactionCoder, TransactionOuterClass}
import com.daml.lf.value.Value.{ContractId, ContractInst, VersionedValue}
import com.daml.lf.value.ValueCoder

trait ContractSerializer {
  def serializeContractInstance(
      coinst: ContractInst[VersionedValue[ContractId]]): Either[ValueCoder.EncodeError, Array[Byte]]

  def deserializeContractInstance(
      stream: InputStream): Either[ValueCoder.DecodeError, ContractInst[VersionedValue[ContractId]]]
}


object ContractSerializer extends ContractSerializer {

  override def serializeContractInstance(coinst: ContractInst[VersionedValue[ContractId]])
    : Either[ValueCoder.EncodeError, Array[Byte]] =
    TransactionCoder
      .encodeContractInstance[ContractId](ValueCoder.CidEncoder, coinst)
      .map(_.toByteArray())

  override def deserializeContractInstance(stream: InputStream)
    : Either[ValueCoder.DecodeError, ContractInst[VersionedValue[ContractId]]] =
    TransactionCoder
      .decodeContractInstance[ContractId](
        ValueCoder.CidDecoder,
        TransactionOuterClass.ContractInstance.parseFrom(
          Decode.damlLfCodedInputStream(stream, Reader.PROTOBUF_RECURSION_LIMIT))
      )

} 
Example 8
Source File: TransactionSerializer.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package db.migration.translation

import java.io.InputStream

import com.daml.lf.archive.{Decode, Reader}
import com.daml.lf.data.Ref.LedgerString
import com.daml.lf.transaction.{Transaction => Tx, TransactionCoder, TransactionOuterClass}
import com.daml.lf.value.ValueCoder
import com.daml.lf.value.ValueCoder.{DecodeError, EncodeError}

trait TransactionSerializer {

  def serializeTransaction(
      trId: LedgerString,
      transaction: Tx.CommittedTransaction,
  ): Either[EncodeError, Array[Byte]]

  def deserializeTransaction(
      trId: LedgerString,
      stream: InputStream,
  ): Either[DecodeError, Tx.CommittedTransaction]

}

object TransactionSerializer extends TransactionSerializer {

  override def serializeTransaction(
      trId: LedgerString,
      transaction: Tx.CommittedTransaction,
  ): Either[EncodeError, Array[Byte]] =
    TransactionCoder
      .encodeTransaction(
        TransactionCoder.EventIdEncoder(trId),
        ValueCoder.CidEncoder,
        transaction
      )
      .map(_.toByteArray())

  override def deserializeTransaction(
      trId: LedgerString,
      stream: InputStream): Either[DecodeError, Tx.CommittedTransaction] =
    TransactionCoder
      .decodeTransaction(
        TransactionCoder.EventIdDecoder(trId),
        ValueCoder.CidDecoder,
        TransactionOuterClass.Transaction.parseFrom(
          Decode.damlLfCodedInputStream(stream, Reader.PROTOBUF_RECURSION_LIMIT))
      )
      .map(Tx.CommittedTransaction(_))
} 
Example 9
Source File: DarManifestReader.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf
package archive

import java.io.InputStream
import java.util.jar.{Attributes, Manifest}

import scala.util.{Failure, Success, Try}

object DarManifestReader {

  private val supportedFormat = "daml-lf"

  def dalfNames(is: InputStream): Try[Dar[String]] = {
    val manifest = new Manifest(is)
    val attributes = value(manifest.getMainAttributes) _
    for {
      mainDalf <- attributes("Main-Dalf")
      allDalfs <- attributes("Dalfs")
      format <- attributes("Format")
      _ <- checkFormat(format)
    } yield Dar(mainDalf, dependencies(allDalfs, mainDalf))
  }

  private def dependencies(other: String, main: String): List[String] = {
    val deps = other.split(',').view.map(_.trim)
    deps.filter(x => x != main).toList
  }

  private def value(attributes: Attributes)(key: String): Try[String] =
    Option(attributes.getValue(key)) match {
      case None => failure(s"Cannot find attribute: $key")
      case Some(x) => Success(x.trim)
    }

  private def checkFormat(format: String): Try[Unit] =
    if (format == supportedFormat) Success(())
    else failure(s"Unsupported format: $format")

  private def failure(msg: String) = Failure(DarManifestReaderException(msg))

  case class DarManifestReaderException(msg: String) extends IllegalStateException(msg)
} 
Example 10
Source File: Decode.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf
package archive

import java.io.InputStream

import com.daml.lf.data.Ref._
import com.daml.lf.language.Ast._
import com.daml.lf.language.LanguageMajorVersion._
import com.daml.lf.language.LanguageVersion
import com.daml.daml_lf_dev.DamlLf
import com.google.protobuf.CodedInputStream

sealed class Decode(onlySerializableDataDefs: Boolean)
    extends archive.Reader[(PackageId, Package)] {
  import Decode._

  private[lf] val decoders: PartialFunction[LanguageVersion, PayloadDecoder] = {
    case LanguageVersion(V1, minor) if V1.supportedMinorVersions.contains(minor) =>
      PayloadDecoder(new DecodeV1(minor))(_.getDamlLf1)
  }

  override protected[this] def readArchivePayloadOfVersion(
      hash: PackageId,
      lf: DamlLf.ArchivePayload,
      version: LanguageVersion
  ): (PackageId, Package) = {
    val decoder =
      decoders.lift(version).getOrElse(throw ParseError(s"$version unsupported"))

    (hash, decoder.decoder.decodePackage(hash, decoder.extract(lf), onlySerializableDataDefs))
  }
}

object Decode extends Decode(onlySerializableDataDefs = false) {
  type ParseError = Reader.ParseError
  val ParseError = Reader.ParseError

  def damlLfCodedInputStreamFromBytes(
      payload: Array[Byte],
      recursionLimit: Int = PROTOBUF_RECURSION_LIMIT
  ): CodedInputStream =
    Reader.damlLfCodedInputStreamFromBytes(payload, recursionLimit)

  def damlLfCodedInputStream(
      is: InputStream,
      recursionLimit: Int = PROTOBUF_RECURSION_LIMIT): CodedInputStream =
    Reader.damlLfCodedInputStream(is, recursionLimit)

  
  private[lf] sealed abstract class PayloadDecoder {
    type I
    val extract: DamlLf.ArchivePayload => I
    val decoder: OfPackage[I]
  }

  private[archive] object PayloadDecoder {
    def apply[I0](fi: OfPackage[I0])(k: DamlLf.ArchivePayload => I0): PayloadDecoder =
      new PayloadDecoder {
        type I = I0
        override val extract = k
        override val decoder = fi
      }
  }

  private[lf] trait OfPackage[-Pkg] {
    type ProtoScenarioModule
    def protoScenarioModule(cis: CodedInputStream): ProtoScenarioModule
    @throws[ParseError]
    def decodePackage(
        packageId: PackageId,
        lfPackage: Pkg,
        onlySerializableDataDefs: Boolean = false): Package
    @throws[ParseError]
    def decodeScenarioModule(packageId: PackageId, lfModuleForScenario: ProtoScenarioModule): Module
  }

  private def identifierStart(c: Char) =
    'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '$' || c == '_'

  private def identifierPart(c: Char): Boolean =
    identifierStart(c) || '0' <= c && c <= '9'

  def checkIdentifier(s: String): Unit = {
    if (s.isEmpty)
      throw Reader.ParseError("empty identifier")
    else if (!(identifierStart(s.head) && s.tail.forall(identifierPart)))
      throw Reader.ParseError(s"identifier $s contains invalid character")
  }

  private val decimalPattern = "[+-]*[0-9]{0,28}(\\.[0-9]{0,10})*".r.pattern
  def checkDecimal(s: String): Boolean =
    decimalPattern.matcher(s).matches()

} 
Example 11
Source File: DarManifestReaderTest.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.archive

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.Charset

import com.daml.lf.archive.DarManifestReader.DarManifestReaderException
import org.scalatest.{Inside, Matchers, WordSpec}

import scala.util.{Failure, Success}

class DarManifestReaderTest extends WordSpec with Matchers with Inside {

  private val unicode = Charset.forName("UTF-8")

  "should read dalf names from manifest, real scenario with Dalfs line split" in {
    val manifest = """Manifest-Version: 1.0
      |Created-By: Digital Asset packager (DAML-GHC)
      |Main-Dalf: com.daml.lf.archive:DarReaderTest:0.1.dalf
      |Dalfs: com.daml.lf.archive:DarReaderTest:0.1.dalf, daml-pri
      | m.dalf
      |Format: daml-lf
      |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(
      Dar("com.daml.lf.archive:DarReaderTest:0.1.dalf", List("daml-prim.dalf")))

    inputStream.close()
  }

  "should read dalf names from manifest, Main-Dalf returned in the head" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf
                     |Format: daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(Dar("A.dalf", List("B.dalf", "C.dalf", "E.dalf")))

    inputStream.close()
  }

  "should read dalf names from manifest, can handle one Dalf per manifest" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: A.dalf
                     |Format: daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(Dar("A.dalf", List.empty))

    inputStream.close()
  }

  "should return failure if Format is not daml-lf" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf
                     |Format: anything-different-from-daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    inside(actual) {
      case Failure(DarManifestReaderException(msg)) =>
        msg shouldBe "Unsupported format: anything-different-from-daml-lf"
    }

    inputStream.close()
  }
} 
Example 12
Source File: ProcessTestUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
} 
Example 13
Source File: CryptoStreamUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.security

import java.io.{InputStream, OutputStream}
import java.util.Properties
import javax.crypto.spec.{IvParameterSpec, SecretKeySpec}

import org.apache.commons.crypto.random._
import org.apache.commons.crypto.stream._
import org.apache.hadoop.io.Text

import org.apache.spark.SparkConf
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._


  private[this] def createInitializationVector(properties: Properties): Array[Byte] = {
    val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
    val initialIVStart = System.currentTimeMillis()
    CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv)
    val initialIVFinish = System.currentTimeMillis()
    val initialIVTime = initialIVFinish - initialIVStart
    if (initialIVTime > 2000) {
      logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " +
        s"used by CryptoStream")
    }
    iv
  }
} 
Example 14
Source File: MetricsConfig.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils

private[spark] class MetricsConfig(conf: SparkConf) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  
  private[this] def loadPropertiesFromFile(path: Option[String]): Unit = {
    var is: InputStream = null
    try {
      is = path match {
        case Some(f) => new FileInputStream(f)
        case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)
      }

      if (is != null) {
        properties.load(is)
      }
    } catch {
      case e: Exception =>
        val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME)
        logError(s"Error loading configuration file $file", e)
    } finally {
      if (is != null) {
        is.close()
      }
    }
  }

} 
Example 15
Source File: ReplayListenerBus.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.ReplayListenerBus._
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false,
      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {

    var currentLine: String = null
    var lineNumber: Int = 0

    try {
      val lineEntries = Source.fromInputStream(logData)
        .getLines()
        .zipWithIndex
        .filter { case (line, _) => eventsFilter(line) }

      while (lineEntries.hasNext) {
        try {
          val entry = lineEntries.next()

          currentLine = entry._1
          lineNumber = entry._2 + 1

          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            // the last entry may not be the very last line in the event log, but we treat it
            // as such in a best effort to replay the given input
            if (!maybeTruncated || lineEntries.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}


private[spark] object ReplayListenerBus {

  type ReplayEventsFilter = (String) => Boolean

  // utility filter that selects all event logs during replay
  val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true }
} 
Example 16
Source File: TFRecordIterator.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.utils.tf

import java.io.{BufferedInputStream, File, FileInputStream, InputStream}
import java.nio.{ByteBuffer, ByteOrder}


class TFRecordIterator(inputStream: InputStream) extends Iterator[Array[Byte]] {

  private var dataBuffer: Array[Byte] = null

  private val lengthBuffer: Array[Byte] = new Array[Byte](8)



  override def hasNext: Boolean = {
    if (dataBuffer != null) {
      true
    } else {
      val numOfBytes = inputStream.read(lengthBuffer)
      if (numOfBytes == 8) {
        val lengthWrapper = ByteBuffer.wrap(lengthBuffer)
        lengthWrapper.order(ByteOrder.LITTLE_ENDIAN)
        val length = lengthWrapper.getLong().toInt
        // todo, do crc check, simply skip now
        inputStream.skip(4)

        dataBuffer = new Array[Byte](length)
        inputStream.read(dataBuffer)
        // todo, do crc check, simply skip now
        inputStream.skip(4)
        true
      } else {
        inputStream.close()
        false
      }
    }
  }

  override def next(): Array[Byte] = {
    if (hasNext) {
      val data = this.dataBuffer
      this.dataBuffer = null
      data
    } else {
      throw new NoSuchElementException("next on empty iterator")
    }
  }
}

object TFRecordIterator {
  def apply(file: File): TFRecordIterator = {
    val inputStream = new FileInputStream(file)
    new TFRecordIterator(inputStream)
  }
} 
Example 17
Source File: WarcRecord.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.specific.warc

import java.io.{BufferedInputStream, InputStream}

import org.archive.archivespark.dataspecs.access.DataAccessor
import org.archive.archivespark.functions.StringContent
import org.archive.archivespark.model.dataloads.{ByteLoad, DataLoad, TextLoad}
import org.archive.archivespark.model.pointers.FieldPointer
import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion}
import org.archive.archivespark.sparkling.cdx.CdxRecord
import org.archive.archivespark.sparkling.warc.{WarcRecord => WARC}
import org.archive.archivespark.specific.warc.functions.WarcPayload

class WarcRecord(cdx: CdxRecord, val data: DataAccessor[InputStream]) extends DataEnrichRoot[CdxRecord, WARC](cdx) with WarcLikeRecord {
  override def access[R >: Null](action: WARC => R): R = data.access { stream =>
    WARC.get(if (stream.markSupported) stream else new BufferedInputStream(stream)) match {
      case Some(record) => action(record)
      case None => null
    }
  }

  override def companion: EnrichRootCompanion[WarcRecord] = WarcRecord
}

object WarcRecord extends EnrichRootCompanion[WarcRecord] {
  override def dataLoad[T](load: DataLoad[T]): Option[FieldPointer[WarcRecord, T]] = (load match {
    case ByteLoad => Some(WarcPayload)
    case TextLoad => Some(StringContent)
    case _ => None
  }).map(_.asInstanceOf[FieldPointer[WarcRecord, T]])
} 
Example 18
Source File: FileStreamRecord.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.specific.raw

import java.io.InputStream

import org.archive.archivespark.dataspecs.access.DataAccessor
import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion}
import org.archive.archivespark.sparkling.io.IOUtil
import org.archive.archivespark.sparkling.util.{IteratorUtil, StringUtil}

import scala.io.Source

class FileStreamRecord(path: String, accessor: DataAccessor[InputStream], retryDelayMs: Option[Int] = None) extends DataEnrichRoot[String, InputStream](path) {
  override def access[R >: Null](action: InputStream => R): R = accessor.access(action)

  def accessSource[R >: Null](action: Source => R): R = access { stream =>
    StringUtil.source(stream) { source =>
      action(source)
    }
  }

  def lineIterator: Iterator[String] = accessor.get match {
    case Some(stream) => IteratorUtil.cleanup(IOUtil.lines(stream), () => stream.close())
    case None => Iterator.empty
  }

  override def companion: EnrichRootCompanion[FileStreamRecord] = FileStreamRecord
}

object FileStreamRecord extends EnrichRootCompanion[FileStreamRecord] 
Example 19
Source File: HdfsFileAccessor.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.dataspecs.access

import java.io.InputStream

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.deploy.SparkHadoopUtil
import org.archive.archivespark.sparkling.io.IOUtil

class HdfsFileAccessor(path: String, decompress: Boolean = true) extends CloseableDataAccessor[InputStream] {
  override def get: Option[InputStream] = {
    val fs = FileSystem.get(SparkHadoopUtil.get.conf)
    var stream: InputStream = null
    try {
      val raw = fs.open(new Path(path))
      stream = if (decompress) IOUtil.decompress(raw, Some(path)) else raw
      Some(stream)
    } catch {
      case e: Exception =>
        e.printStackTrace()
        if (stream != null) stream.close()
        None
    }
  }
} 
Example 20
Source File: HdfsStreamAccessor.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.dataspecs.access

import java.io.InputStream

import org.apache.commons.io.input.BoundedInputStream
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import org.apache.spark.deploy.SparkHadoopUtil

class HdfsStreamAccessor(location: HdfsLocationInfo) extends CloseableDataAccessor[InputStream] {
  override def get: Option[InputStream] = {
    if (location.length < 0 || location.offset < 0) None
    else {
      val fs = FileSystem.get(SparkHadoopUtil.get.conf)
      var stream: FSDataInputStream = null
      try {
        stream = fs.open(new Path(location.path))
        stream.seek(location.offset)
        Some(new BoundedInputStream(stream, location.length))
      } catch {
        case e: Exception =>
          e.printStackTrace()
          if (stream != null) stream.close()
          None
      }
    }
  }
} 
Example 21
Source File: ByteArrayAccessor.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.dataspecs.access

import java.io.InputStream
import java.util.zip.GZIPInputStream

import org.archive.archivespark.sparkling.io.ByteArray

class ByteArrayAccessor(bytes: ByteArray, gz: Boolean = false) extends CloseableDataAccessor[InputStream] {
  def this(bytes: Array[Byte], gz: Boolean) = this({
    val array = new ByteArray()
    array.append(bytes)
    array
  }, gz)

  def this(bytes: Array[Byte]) = this(bytes, false)

  override def get: Option[InputStream] = {
    var stream: InputStream = null
    try {
      stream = bytes.toInputStream
      stream = if (gz) new GZIPInputStream(stream) else stream
      Some(stream)
    } catch {
      case e: Exception =>
        e.printStackTrace()
        if (stream != null) stream.close()
        None
    }
  }
} 
Example 22
Source File: HttpClient.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.http

import java.io.{BufferedInputStream, InputStream}
import java.net.{HttpURLConnection, URL, URLConnection}

import org.archive.archivespark.sparkling.logging.LogContext
import org.archive.archivespark.sparkling.util.Common

import scala.collection.JavaConverters._
import scala.util.Try

object HttpClient {
  val DefaultRetries: Int = 30
  val DefaultSleepMillis: Int = 1000
  val DefaultTimeoutMillis: Int = -1

  implicit val logContext: LogContext = LogContext(this)

  def request[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: InputStream => R): R = rangeRequest(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action)

  def rangeRequest[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: InputStream => R): R = {
    rangeRequestConnection(url, headers, offset, length, retries, sleepMillis, timeoutMillis) { case connection: HttpURLConnection =>
      val in = new BufferedInputStream(connection.getInputStream)
      val r = action(in)
      Try(in.close())
      r
    }
  }

  def requestMessage[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: HttpMessage => R): R = rangeRequestMessage(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action)

  def rangeRequestMessage[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: HttpMessage => R): R = {
    rangeRequestConnection(url, headers, offset, length, retries, sleepMillis, timeoutMillis) { case connection: HttpURLConnection =>
      val in = new BufferedInputStream(connection.getInputStream)
      val responseHeaders = connection.getHeaderFields.asScala.toMap.flatMap{case (k, v) => v.asScala.headOption.map((if (k == null) "" else k) -> _)}
      val message = new HttpMessage(connection.getResponseMessage, responseHeaders, in)
      val r = action(message)
      Try(in.close())
      r
    }
  }

  def requestConnection[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: URLConnection => R): R = rangeRequestConnection(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action)

  def rangeRequestConnection[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: URLConnection => R): R = {
    Common.timeoutWithReporter(timeoutMillis) { reporter =>
      val connection = Common.retry(retries, sleepMillis, (retry, e) => {
        "Request failed (" + retry + "/" + retries + "): " + url + " (" + offset + "-" + (if (length >= 0) length else "") + ") - " + e.getMessage
      }) { _ =>
        reporter.alive()
        val connection = new URL(url).openConnection()
        for ((key, value) <- headers) connection.addRequestProperty(key, value)
        if (offset > 0 || length >= 0) connection.addRequestProperty("Range", "bytes=" + offset + "-" + (if (length >= 0) offset + length - 1 else ""))
        connection.asInstanceOf[HttpURLConnection]
      }
      val r = action(connection)
      Try(connection.disconnect())
      r
    }
  }
} 
Example 23
Source File: HttpMessage.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.http

import java.io.{BufferedInputStream, InputStream}
import java.util.zip.GZIPInputStream

import org.apache.commons.httpclient.ChunkedInputStream
import org.apache.http.client.entity.DeflateInputStream
import org.archive.archivespark.sparkling.io.IOUtil
import org.archive.archivespark.sparkling.util.StringUtil

import scala.collection.immutable.ListMap
import scala.util.Try

class HttpMessage (val statusLine: String, val headers: Map[String, String], val payload: InputStream) {
  import HttpMessage._

  lazy val lowerCaseHeaders: Map[String, String] = headers.map{case (k,v) => (k.toLowerCase, v)}

  def contentEncoding: Option[String] = lowerCaseHeaders.get("content-encoding").map(_.toLowerCase)
  def mime: Option[String] = lowerCaseHeaders.get("content-type").map(_.split(';').head.trim.toLowerCase)
  def charset: Option[String] = {
    lowerCaseHeaders.get("content-type").flatMap(_.split(';').drop(1).headOption).map(_.trim)
      .filter(_.startsWith("charset="))
      .map(_.drop(8).trim.stripPrefix("\"").stripPrefix("'").stripSuffix("'").stripSuffix("\"").split(",", 2).head.trim)
      .filter(_.nonEmpty).map(_.toUpperCase)
  }
  def redirectLocation: Option[String] = lowerCaseHeaders.get("location").map(_.trim)
  def isChunked: Boolean = lowerCaseHeaders.get("transfer-encoding").map(_.toLowerCase).contains("chunked")

  def status: Int = statusLine.split(" +").drop(1).headOption.flatMap(s => Try{s.toInt}.toOption).getOrElse(-1)

  lazy val body: InputStream = Try {
    var decoded = if (isChunked) new ChunkedInputStream(payload) else payload
    val decoders = contentEncoding.toSeq.flatMap(_.split(',').map(_.trim).flatMap(DecoderRegistry.get))
    for (decoder <- decoders) decoded = decoder(decoded)
    new BufferedInputStream(decoded)
  }.getOrElse(IOUtil.emptyStream)

  lazy val bodyString: String = StringUtil.fromInputStream(body, charset.toSeq ++ BodyCharsets)
}

object HttpMessage {
  val Charset: String = "UTF-8"
  val HttpMessageStart = "HTTP/"
  val BodyCharsets: Seq[String] = Seq("UTF-8", "ISO-8859-1", "WINDOWS-1252")

  // see org.apache.http.client.protocol.ResponseContentEncoding
  val DecoderRegistry: Map[String, InputStream => InputStream] = Map(
    "gzip" -> ((in: InputStream) => new GZIPInputStream(in)),
    "x-gzip" -> ((in: InputStream) => new GZIPInputStream(in)),
    "deflate" -> ((in: InputStream) => new DeflateInputStream(in))
  )

  def get(in: InputStream): Option[HttpMessage] = {
    var line = StringUtil.readLine(in, Charset)
    while (line != null && !{
      if (line.startsWith(HttpMessageStart)) {
        val statusLine = line
        val headers = collection.mutable.Buffer.empty[(String, String)]
        line = StringUtil.readLine(in, Charset)
        while (line != null && line.trim.nonEmpty) {
          val split = line.split(":", 2)
          if (split.length == 2) headers += ((split(0).trim, split(1).trim))
          line = StringUtil.readLine(in, Charset)
        }
        return Some(new HttpMessage(statusLine, ListMap(headers: _*), in))
      }
      false
    }) line = StringUtil.readLine(in, Charset)
    None
  }
} 
Example 24
Source File: DigestUtil.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.util

import java.io.InputStream

import org.apache.commons.codec.binary.Base32
import org.apache.commons.codec.digest.DigestUtils

object DigestUtil {
  def sha1Base32(in: InputStream): String = {
    val digest = DigestUtils.sha1(in)
    new Base32().encodeAsString(digest).toUpperCase
  }

  def sha1Base32(bytes: Array[Byte]): String = {
    val digest = DigestUtils.sha1(bytes)
    new Base32().encodeAsString(digest).toUpperCase
  }

  def sha1Base32(str: String): String = {
    val digest = DigestUtils.sha1(str)
    new Base32().encodeAsString(digest).toUpperCase
  }
} 
Example 25
Source File: TypedInOut.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.io

import java.io.{InputStream, OutputStream}

trait TypedInOut[A] extends Serializable {
  trait TypedInOutWriter {
    def stream: OutputStream
    def write(record: A)
    def flush(): Unit
    def close(): Unit
  }

  def out(stream: OutputStream): TypedInOutWriter
  def in(stream: InputStream): Iterator[A]
}

object TypedInOut {
  def apply[A, O](writer: OutputStream => O, reader: InputStream => Iterator[A])(writeRecord: (A, O) => Unit, flushOut: O => Unit, closeOut: O => Unit): TypedInOut[A] = new TypedInOut[A] {
    override def out(outStream: OutputStream): TypedInOutWriter = new TypedInOutWriter {
      override val stream: OutputStream = outStream
      private val out = writer(stream)
      override def write(record: A): Unit = writeRecord(record, out)
      override def flush(): Unit = flushOut(out)
      override def close(): Unit = closeOut(out)
    }

    override def in(inStream: InputStream): Iterator[A] = reader(inStream)
  }

  implicit val stringInOut: TypedInOut[String] = TypedInOut(IOUtil.print(_), IOUtil.lines(_))(
    (r, o) => o.println(r),
    _.flush(),
    _.close()
  )

  def toStringInOut[A](toString: A => String, fromString: String => A): TypedInOut[A] = TypedInOut(IOUtil.print(_), IOUtil.lines(_).map(fromString))(
    (r, o) => o.println(toString(r)),
    _.flush(),
    _.close()
  )
} 
Example 26
Source File: NonClosingInputStream.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.io

import java.io.InputStream

class NonClosingInputStream(in: InputStream) extends InputStream {
  override def read(): Int = in.read()
  override def read(b: Array[Byte]): Int = in.read(b)
  override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(b, off, len)
  override def skip(n: Long): Long = in.skip(n)
  override def available(): Int = in.available()
  override def close(): Unit = {}
  override def mark(readlimit: Int): Unit = in.mark(readlimit)
  override def reset(): Unit = in.reset()
  override def markSupported(): Boolean = in.markSupported()
} 
Example 27
Source File: GzipUtil.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.io

import java.io.{BufferedInputStream, InputStream}

import com.google.common.io.CountingInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.archive.archivespark.sparkling.Sparkling
import org.archive.archivespark.sparkling.util.IteratorUtil

import scala.util.Try

object GzipUtil {
  import Sparkling._

  val Magic0 = 31
  val Magic1 = 139

  def isCompressed(in: InputStream): Boolean = {
    in.mark(2)
    val (b0, b1) = (in.read, in.read)
    in.reset()
    b0 == Magic0 && b1 == Magic1
  }

  def decompressConcatenated(in: InputStream): Iterator[InputStream] = decompressConcatenatedWithPosition(in).map{case (pos, s) => s}

  def decompressConcatenatedWithPosition(in: InputStream): Iterator[(Long, InputStream)] = {
    val stream = new CountingInputStream(IOUtil.supportMark(new NonClosingInputStream(in)))
    var last: Option[InputStream] = None
    IteratorUtil.whileDefined {
      if (last.isDefined) IOUtil.readToEnd(last.get, close = true)
      if (IOUtil.eof(stream)) {
        stream.close()
        None
      } else Try {
        val pos = stream.getCount
        last = Some(new GzipCompressorInputStream(new NonClosingInputStream(stream), false))
        last.map((pos, _))
      }.getOrElse(None)
    }
  }

  def estimateCompressionFactor(in: InputStream, readUncompressedBytes: Long): Double = {
    val stream = new CountingInputStream(new BufferedInputStream(new NonClosingInputStream(in)))
    val uncompressed = new GzipCompressorInputStream(stream, true)
    var read = IOUtil.skip(uncompressed, readUncompressedBytes)
    val decompressed = stream.getCount
    while (decompressed == stream.getCount && !IOUtil.eof(uncompressed, markReset = false)) read += 1
    val factor = read.toDouble / decompressed
    uncompressed.close()
    factor
  }

  def decompress(in: InputStream, filename: Option[String] = None, checkFile: Boolean = false): InputStream = {
    val buffered = if (in.markSupported()) in else new BufferedInputStream(in)
    if (!IOUtil.eof(buffered) && ((filename.isEmpty && !checkFile) || (filename.isDefined && filename.get.toLowerCase.endsWith(GzipExt)))) {
      new GzipCompressorInputStream(buffered, true)
    } else buffered
  }
} 
Example 28
Source File: HdfsBlockStream.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.io

import java.io.{ByteArrayInputStream, InputStream}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.archive.archivespark.sparkling.logging.LogContext
import org.archive.archivespark.sparkling.util.Common

import scala.util.Try

class HdfsBlockStream (fs: FileSystem, file: String, offset: Long = 0, length: Long = -1, retries: Int = 60, sleepMillis: Int = 1000 * 60) extends InputStream {
  implicit val logContext: LogContext = LogContext(this)

  val path = new Path(file)
  val (blockSize: Int, fileSize: Long) = {
    val status = fs.getFileStatus(path)
    (status.getBlockSize.min(Int.MaxValue).toInt, status.getLen)
  }

  private var pos: Long = offset.max(0)
  private val max: Long = if (length > 0) fileSize.min(pos + length) else fileSize

  private val buffer = new Array[Byte](blockSize)
  private val emptyBlock = new ByteArrayInputStream(Array.emptyByteArray)
  private var block: ByteArrayInputStream = emptyBlock

  def ensureNextBlock(): InputStream = {
    if (block.available() == 0 && pos < max) {
      val end = pos + blockSize
      val blockLength = ((end - (end % blockSize)).min(max) - pos).toInt
      Common.retry(retries, sleepMillis, (retry, e) => {
        "File access failed (" + retry + "/" + retries + "): " + path + " (Offset: " + pos + ") - " + e.getMessage
      }) { retry =>
        val in = fs.open(path, blockLength)
        if (retry > 0) Try(in.seekToNewSource(pos))
        else if (pos > 0) in.seek(pos)
        var read = 0
        while (read < blockLength) read += in.read(buffer, read, blockLength - read)
        Try(in.close())
      }
      pos += blockLength
      block = new ByteArrayInputStream(buffer, 0, blockLength)
    }
    block
  }

  override def read(): Int = ensureNextBlock().read()

  override def read(b: Array[Byte]): Int = ensureNextBlock().read(b)

  override def read(b: Array[Byte], off: Int, len: Int): Int = ensureNextBlock().read(b, off, len)

  override def skip(n: Long): Long = {
    val available = block.available()
    if (n <= available) block.skip(n)
    else {
      block = emptyBlock
      val currentPos = pos - available
      val skip = n.min(max - currentPos)
      pos += skip - available
      skip
    }
  }

  override def available(): Int = block.available()

  override def close(): Unit = {}
  override def markSupported(): Boolean = false
} 
Example 29
Source File: Licence.scala    From slide-desktop   with GNU General Public License v2.0 5 votes vote down vote up
package gui

import java.awt.{BorderLayout, Insets}
import java.io.InputStream
import java.util.Scanner
import javax.swing.{JFrame, JScrollPane, JTextArea, ScrollPaneConstants}

object Licence extends JFrame {
    val istream: InputStream = getClass.getResourceAsStream("res/licence-gpl.txt")
    val licenseText: String = new Scanner(istream, "UTF-8").useDelimiter("\\A").next

    this.setTitle("Licence")
    this.setBounds(100, 100, 640, 800)

    val textField: JTextArea = new JTextArea
    textField.setEditable(false)
    textField.setMargin(new Insets(10, 10, 10, 10))
    textField.setAlignmentX(0)
    textField.setText(licenseText)
    textField.setCaretPosition(0)

    this.add(textField, BorderLayout.CENTER)

    val scroll: JScrollPane = new JScrollPane(textField,
        ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS, ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER)
    this.getContentPane.add(scroll)

    def showLicense(): Unit = this.setVisible(true)
} 
Example 30
Source File: Hashes.scala    From matcher   with MIT License 5 votes vote down vote up
import java.io.InputStream

object Hashes {
  def mk(algorithm: String, stream: InputStream): Array[Byte] = {
    import java.security.{DigestInputStream, MessageDigest}
    val digest = MessageDigest.getInstance(algorithm)
    try {
      val dis    = new DigestInputStream(stream, digest)
      val buffer = new Array[Byte](8192)
      while (dis.read(buffer) >= 0) {}
      dis.close()
      digest.digest
    } finally { stream.close() }
  }
} 
Example 31
Source File: ServerIpUtil.scala    From piflow   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package cn.piflow.util

import java.io.{File, FileInputStream, InputStream}
import java.net.InetAddress
import java.util.Properties

object ServerIpUtil {
  private val prop: Properties = new Properties()
  var fis: InputStream = null
  var path :String = ""

    try{

    val userDir = System.getProperty("user.dir")
    path = userDir + "/server.ip"
    val file = new File(path)
    if(!file.exists()){
      file.createNewFile()
    }
    prop.load(new FileInputStream(path))
  } catch{
    case ex: Exception => ex.printStackTrace()
  }

  def getServerIpFile() : String = {
    path
  }


  def getServerIp(): String ={
    val obj = prop.get("server.ip")
    if(obj != null){
      return obj.toString
    }
    null
  }

  def main(args: Array[String]): Unit = {

    val ip = InetAddress.getLocalHost.getHostAddress
    //write ip to server.ip file
    FileUtil.writeFile("server.ip=" + ip, ServerIpUtil.getServerIpFile())
    println(ServerIpUtil.getServerIp())
  }
} 
Example 32
Source File: PropertyUtil.scala    From piflow   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package cn.piflow.util

import java.io.{FileInputStream, InputStream}
import java.util.Properties

object PropertyUtil {
  private val prop: Properties = new Properties()
  var fis: InputStream = null
  var path :String = ""
  var classPath:String = ""
  var scalaPath:String = ""
    try{
    //val path = Thread.currentThread().getContextClassLoader.getResource("config.properties").getPath
    //fis = this.getClass.getResourceAsStream("")
    val userDir = System.getProperty("user.dir")
    path = userDir + "/config.properties"
    classPath = userDir + "/classpath/"
    scalaPath = userDir + "/scala"
    prop.load(new FileInputStream(path))
  } catch{
    case ex: Exception => ex.printStackTrace()
  }

  def getConfigureFile() : String = {
    path
  }

  def getClassPath():String = {
    classPath
  }

  def getScalaPath():String = {
    scalaPath
  }

  def getPropertyValue(propertyKey: String): String ={
    val obj = prop.get(propertyKey)
    if(obj != null){
      return obj.toString
    }
    null
  }

  def getIntPropertyValue(propertyKey: String): Int ={
    val obj = prop.getProperty(propertyKey)
    if(obj != null){
      return obj.toInt
    }
    throw new NullPointerException
  }

} 
Example 33
Source File: ClasspathResources.scala    From intro-to-akka-streams   with Apache License 2.0 5 votes vote down vote up
package com.github.dnvriend.streams.util

import java.io.InputStream

import akka.NotUsed
import akka.stream.IOResult
import akka.stream.scaladsl.{ Source, StreamConverters }
import akka.util.ByteString

import scala.concurrent.Future
import scala.io.{ Source ⇒ ScalaIOSource }
import scala.util.Try
import scala.xml.pull.{ XMLEvent, XMLEventReader }

trait ClasspathResources {
  def withInputStream[T](fileName: String)(f: InputStream ⇒ T): T = {
    val is = fromClasspathAsStream(fileName)
    try {
      f(is)
    } finally {
      Try(is.close())
    }
  }

  def withXMLEventReader[T](fileName: String)(f: XMLEventReader ⇒ T): T =
    withInputStream(fileName) { is ⇒
      f(new XMLEventReader(ScalaIOSource.fromInputStream(is)))
    }

  def withXMLEventSource[T](fileName: String)(f: Source[XMLEvent, NotUsed] ⇒ T): T =
    withXMLEventReader(fileName) { reader ⇒
      f(Source.fromIterator(() ⇒ reader))
    }

  def withByteStringSource[T](fileName: String)(f: Source[ByteString, Future[IOResult]] ⇒ T): T =
    withInputStream(fileName) { inputStream ⇒
      f(StreamConverters.fromInputStream(() ⇒ inputStream))
    }

  def streamToString(is: InputStream): String =
    ScalaIOSource.fromInputStream(is).mkString

  def fromClasspathAsString(fileName: String): String =
    streamToString(fromClasspathAsStream(fileName))

  def fromClasspathAsStream(fileName: String): InputStream =
    getClass.getClassLoader.getResourceAsStream(fileName)

} 
Example 34
Source File: MergeStrategySpec.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package daf.filesystem

import java.io.{ Closeable, InputStream }
import java.util.Scanner

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{ FSDataInputStream, FSDataOutputStream, FileSystem, Path }
import org.scalatest.{ BeforeAndAfterAll, Matchers, WordSpec }

import scala.collection.convert.decorateAsScala._
import scala.util.{ Random, Try }

class MergeStrategySpec extends WordSpec with Matchers with BeforeAndAfterAll {

  private implicit val fileSystem = FileSystem.getLocal(new Configuration)

  private val numFiles = 10

  private val baseDir = "test-dir".asHadoop

  private val workingDir = baseDir / f"merge-strategy-spec-${Random.nextInt(10000)}%05d"

  private def safely[A <: Closeable, U](f: A => U) = { stream: A =>
    val attempt = Try { f(stream) }
    stream.close()
    attempt
  }

  private def readFile(path: Path) = safely[FSDataInputStream, Seq[String]] { _.scanner.asScala.toSeq } apply fileSystem.open(path)

  private def readFiles = Try {
    fileSystem.listStatus(workingDir).toSeq.flatMap { status => readFile(status.getPath).get }
  }

  private def openFiles = Try {
    fileSystem.listStatus(workingDir).toSeq.map { status => fileSystem.open(status.getPath) }
  }

  private def createFile(fileName: String) = safely[FSDataOutputStream, Unit] { stream =>
    Random.alphanumeric.grouped(200).take(10).map { randomSplits(_) }.foreach { row =>
      stream.writeUTF { row.mkString("", ",", "\n") }
    }
  } apply fileSystem.create { workingDir / fileName }

  private def randomSplits(chars: Stream[Char], strings: Seq[String] = Seq.empty): Seq[String] = chars.splitAt { Random.nextInt(10) + 5 } match {
    case (head, tail) if tail.isEmpty => head.drop(1).mkString +: strings
    case (head, tail)                 => randomSplits(tail, head.mkString +: strings)
  }

  private def createWorkingDir = Try { fileSystem.mkdirs(workingDir) }

  private def createFiles = Try {
    0 until numFiles foreach { index => createFile(s"test-file-$index").get } // this is relatively nasty, and should be handled in a `traverse`
  }

  private def prepareData = for {
    _ <- createWorkingDir
    _ <- createFiles
  } yield ()

  private def purgeData = Try { fileSystem.delete(workingDir, true) }

  override def beforeAll() = prepareData.get

  override def afterAll() = purgeData.get

  "MergeStrategies info" when {

    "given compressed format files" must {

      "throw an exception" in {
        an[IllegalArgumentException] must be thrownBy MergeStrategies.find { FileInfo(workingDir / "test-file-0", 0, FileDataFormats.raw, FileCompressionFormats.gzip) }
      }
    }

    "given data as csv" must {

      "drop one line and merge the rest" in {
        safely[InputStream, Seq[String]] { new Scanner(_).asScala.toList }.andThen { attempt =>
          for {
            merged   <- attempt
            expected <- readFiles
          } merged.size should be { expected.size - numFiles + 1 }
        } apply MergeStrategies.csv.merge { openFiles.get }
      }
    }

    "given data as json" must {

      "just merge the files into one" in {
        safely[InputStream, Seq[String]] { new Scanner(_).asScala.toList }.andThen { attempt =>
          for {
            merged   <- attempt
            expected <- readFiles
          } merged.size should be { expected.size }
        } apply MergeStrategies.json.merge { openFiles.get }
      }

    }
  }
} 
Example 35
Source File: ScalastyleInspectionsGenerator.scala    From sonar-scala   with GNU Lesser General Public License v3.0 5 votes vote down vote up
package com.mwz.sonar.scala.metadata.scalastyle

import java.io.InputStream
import java.nio.file.Paths

import com.mwz.sonar.scala.metadata.scalastyle._
import com.typesafe.config.{Config, ConfigFactory}
import org.scalastyle.{Level, _}
import sbt.Keys._
import sbt._

import scala.meta._
import scala.xml.{Node, NodeSeq, XML}


  def transform(source: Tree, inspections: Seq[ScalastyleInspection]): Tree = {
    val stringified: Seq[String] = inspections.collect {
      case inspection =>
        // Is there a better way of embedding multi-line text?
        val extraDescription = inspection.extraDescription.map(s => "\"\"\"" + s + "\"\"\"")
        val justification = inspection.justification.map(s => "\"\"\"" + s + "\"\"\"")
        val params = inspection.params.map { p =>
          s"""
             |ScalastyleParam(
             |  name = "${p.name}",
             |  typ = ${p.typ},
             |  label = "${p.label}",
             |  description = \"\"\"${p.description}\"\"\",
             |  default = \"\"\"${p.default}\"\"\"
             |)
           """.stripMargin
        }

        // It doesn't seem to be straightforward to automatically convert a collection
        // into a tree using scalameta, so I'm turning it into a String so it can be parsed,
        // which is easier than constructing the tree manually.
        // Totally doable with shapeless though, but it would be a bit of an overkill in this case.
        s"""
           |ScalastyleInspection(
           |  clazz = "${inspection.clazz}",
           |  id = "${inspection.id}",
           |  label = "${inspection.label}",
           |  description = "${inspection.description}",
           |  extraDescription = $extraDescription,
           |  justification = $justification,
           |  defaultLevel = ${inspection.defaultLevel},
           |  params = ${params.toString.parse[Term].get.syntax}
           |)
         """.stripMargin
    }

    // Transform the template file.
    val term: Term = stringified.toString.parse[Term].get
    source.transform {
      case q"val AllInspections: $tpe = $expr" =>
        q"val AllInspections: $tpe = $term"
    }
  }
} 
Example 36
Source File: WikipediaToDBpediaClosure.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.db

import org.semanticweb.yars.nx.parser.NxParser
import java.io.InputStream
import org.dbpedia.spotlight.log.SpotlightLog
import collection.immutable.ListSet
import scala.Predef._
import org.dbpedia.spotlight.exceptions.NotADBpediaResourceException
import java.net.URLDecoder
import org.dbpedia.spotlight.model.SpotlightConfiguration
import org.dbpedia.extraction.util.WikiUtil
import scala.collection.mutable.ListBuffer


  def wikipediaToDBpediaURI(url: String): String = {

    val uri = if(url.startsWith("http:")) {
      getEndOfChainURI(decodedNameFromURL(url))
    } else {
      getEndOfChainURI(decodeURL(url))
    }

    if (disambiguationsSet.contains(uri) || uri == null)
      throw new NotADBpediaResourceException("Resource is a disambiguation page.")
    else
      uri
  }

  def getEndOfChainURI(uri: String): String = getEndOfChainURI(uri, Set(uri))

  private def getEndOfChainURI(uri: String, alreadyTraversed:Set[String]): String = linkMap.get(uri) match {
    case Some(s: String) => if (alreadyTraversed.contains(s)) uri else getEndOfChainURI(s, alreadyTraversed + s)
    case None => uri
  }

} 
Example 37
Source File: TokenOccurrenceSource.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.db.io

import java.io.{InputStream, FileInputStream, File}
import io.Source
import org.dbpedia.spotlight.db.WikipediaToDBpediaClosure
import org.dbpedia.spotlight.db.model.{ResourceStore, TokenTypeStore}
import org.dbpedia.spotlight.log.SpotlightLog
import scala.Predef._
import scala.Array
import org.dbpedia.spotlight.exceptions.{DBpediaResourceNotFoundException, NotADBpediaResourceException}
import org.dbpedia.spotlight.model.{TokenType, DBpediaResource}
import util.TokenOccurrenceParser




object TokenOccurrenceSource {

  def fromPigInputStream(tokenInputStream: InputStream, tokenTypeStore: TokenTypeStore, wikipediaToDBpediaClosure: WikipediaToDBpediaClosure, resStore: ResourceStore): Iterator[Triple[DBpediaResource, Array[TokenType], Array[Int]]] = {

    var i = 0
    plainTokenOccurrenceSource(tokenInputStream, 0) map {
      case (wikiurl: String, tokens: Array[String], counts: Array[Int]) => {
        i += 1
        if (i % 10000 == 0)
          SpotlightLog.info(this.getClass, "Read context for %d resources...", i)
        try {
          Triple(
            resStore.getResourceByName(wikipediaToDBpediaClosure.wikipediaToDBpediaURI(wikiurl)),
            tokens.map{ token => tokenTypeStore.getTokenType(token) },
            counts
          )
        } catch {
          case e: DBpediaResourceNotFoundException => Triple(null, null, null)
          case e: NotADBpediaResourceException     => Triple(null, null, null)
        }
      }
    }

  }

  def fromPigFile(tokenFile: File, tokenStore: TokenTypeStore, wikipediaToDBpediaClosure: WikipediaToDBpediaClosure, resStore: ResourceStore, minimumCount: Int) = fromPigInputStream(new FileInputStream(tokenFile), tokenStore, wikipediaToDBpediaClosure, resStore)

  val tokensParser = TokenOccurrenceParser.createDefault

  def plainTokenOccurrenceSource(tokenInputStream: InputStream, minimumCount: Int): Iterator[Triple[String, Array[String], Array[Int]]] = {
    Source.fromInputStream(tokenInputStream) getLines() filter(!_.equals("")) map {
      line: String => {
        val Array(wikiurl, tokens) = line.trim().split('\t')
        val Pair(tokensA, countsA) = tokensParser.parse(tokens, minimumCount)
        Triple(wikiurl, tokensA, countsA)
      }
    }
  }
} 
Example 38
Source File: TokenSource.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.db.io

import org.dbpedia.spotlight.io.OccurrenceSource
import org.dbpedia.spotlight.db.model.{StringTokenizer, SurfaceFormStore}
import collection.mutable.HashMap
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import java.io.{InputStream, FileInputStream, File}
import org.dbpedia.spotlight.log.SpotlightLog
import org.dbpedia.spotlight.model._




object TokenSource {

  private val ADDITIONAL_TOKEN_COUNT = 1

  def fromSFStore(sfStore: SurfaceFormStore, tokenizer: StringTokenizer): Seq[String] = {
    SpotlightLog.info(this.getClass, "Adding all surface form tokens to the TokenStore...")
    sfStore.iterateSurfaceForms.grouped(100000).toList.par.flatMap(_.map{
      sf: SurfaceForm =>
        //Tokenize all SFs first
        tokenizer.tokenize(sf.name)
    }).seq.flatten
  }

  def fromPigFile(tokenFile: File, additionalTokens: Option[Seq[String]] = None, minimumCount: Int) = fromPigInputStream(new FileInputStream(tokenFile), additionalTokens, minimumCount)
  def fromPigInputStream(tokenFile: InputStream, additionalTokens: Option[Seq[String]] = None, minimumCount: Int) = {

    val tokenMap = HashMap[String, Int]()

    var i = 0
    TokenOccurrenceSource.plainTokenOccurrenceSource(tokenFile, minimumCount) foreach {
      p: Triple[String, Array[String], Array[Int]] => {
        i += 1
        if (i % 10000 == 0)
          SpotlightLog.info(this.getClass, "Read context for %d resources...", i)

        (0 to p._2.size -1).foreach {
          i: Int => tokenMap.put(p._2(i), tokenMap.getOrElse(p._2(i), 0) + p._3(i))
        }
      }
    }

    additionalTokens match {
      case Some(tokens) => {
        SpotlightLog.info(this.getClass, "Read %d additional tokens...", tokens.size)
        tokens.foreach { token: String =>
          tokenMap.put(token, tokenMap.getOrElse(token, 0) + ADDITIONAL_TOKEN_COUNT)
        }
      }
      case None =>
    }

    var id = -1
    tokenMap.map{
      case(token, count) => {
        id += 1
        (new TokenType(id, token, count), count)
      }
    }.toMap.asJava
  }

} 
Example 39
Source File: TypesLoader.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.util

import java.io.{File, InputStream}
import java.util.LinkedHashSet

import org.dbpedia.spotlight.log.SpotlightLog
import org.dbpedia.spotlight.model._
import org.semanticweb.yars.nx.parser.NxParser

import scala.collection.JavaConversions._
import scala.io.Source



object TypesLoader
{
    def getTypesMap(typeDictFile : File) : Map[String, List[OntologyType]] = {
        SpotlightLog.info(this.getClass, "Loading types map...")
        if (!(typeDictFile.getName.toLowerCase endsWith ".tsv"))
            throw new IllegalArgumentException("types mapping only accepted in tsv format so far! can't parse "+typeDictFile)
        // CAUTION: this assumes that the most specific type is listed last
        var typesMap = Map[String,List[OntologyType]]()
        for (line <- Source.fromFile(typeDictFile, "UTF-8").getLines) {
            val elements = line.split("\t")
            val uri = new DBpediaResource(elements(0)).uri
            val t = Factory.OntologyType.fromURI(elements(1))
            val typesList : List[OntologyType] = typesMap.get(uri).getOrElse(List[OntologyType]()) ::: List(t)
            typesMap = typesMap.updated(uri, typesList)
        }
        SpotlightLog.info(this.getClass, "Done.")
        typesMap
    }

    def getTypesMapFromTSV_java(input: InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = {
        SpotlightLog.info(this.getClass, "Loading types map...")
        var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]()
        var i = 0;
        for (line <- Source.fromInputStream(input, "UTF-8").getLines) {
            val elements = line.split("\t")
            val uri = new DBpediaResource(elements(0)).uri
            val typeUri = elements(1)
            if (!typeUri.equalsIgnoreCase("http://www.w3.org/2002/07/owl#Thing")) {
                val t = Factory.OntologyType.fromURI(typeUri)
                i = i + 1;
                val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.getOrElse(uri,new LinkedHashSet[OntologyType]())
                typesList.add(t)
                t match {
                    case ft: FreebaseType => typesList.add(Factory.OntologyType.fromQName("Freebase:/"+ft.domain)) //Add supertype as well to mimic inference
                    case _ => //nothing
                }
                typesMap = typesMap.updated(uri, typesList)
            }
        }
        SpotlightLog.info(this.getClass, "Done. Loaded %d types for %d resources.", i,typesMap.size)
        typesMap
    }


    def getTypesMap_java(instanceTypesStream : InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = {
        SpotlightLog.info(this.getClass, "Loading types map...")
        var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]()
        var i = 0;
        // CAUTION: this assumes that the most specific type is listed last
        val parser = new NxParser(instanceTypesStream)
        while (parser.hasNext) {
            val triple = parser.next
            if(!triple(2).toString.endsWith("owl#Thing")) {
                i = i + 1;
                val resource = new DBpediaResource(triple(0).toString)
                val t = Factory.OntologyType.fromURI(triple(2).toString)
                val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.get(resource.uri).getOrElse(new LinkedHashSet[OntologyType]())
                typesList.add(t)
                typesMap = typesMap.updated(resource.uri, typesList)
            }
        }
        SpotlightLog.info(this.getClass, "Done. Loaded %d types.", i)
        typesMap
    }
    
} 
Example 40
Source File: BMLHelper.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.filesystem.bml

import java.io.{ByteArrayInputStream, InputStream}
import java.util
import java.util.UUID

import com.webank.wedatasphere.linkis.bml.client.{BmlClient, BmlClientFactory}
import com.webank.wedatasphere.linkis.bml.protocol.{BmlDownloadResponse, BmlUpdateResponse, BmlUploadResponse}
import com.webank.wedatasphere.linkis.filesystem.exception.WorkspaceExceptionManager
import org.springframework.stereotype.Component

import scala.collection.JavaConversions._


@Component
class BMLHelper {

  def upload(userName: String, content: String, fileName: String): util.Map[String, Object] = {
    val inputStream = new ByteArrayInputStream(content.getBytes("utf-8"))
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def upload(userName: String, inputStream: InputStream, fileName: String, projectName: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }


  def upload(userName: String, inputStream: InputStream, fileName: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def update(userName: String, resourceId: String, inputStream: InputStream): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, "", inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def update(userName: String, resourceId: String, content: String): util.Map[String, Object] = {
    val inputStream = new ByteArrayInputStream(content.getBytes("utf-8"))
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, UUID.randomUUID().toString + ".json", inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def query(userName: String, resourceId: String, version: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    var resource: BmlDownloadResponse = null
    if (version == null) resource = client.downloadResource(userName, resourceId, null)
    else resource = client.downloadResource(userName, resourceId, version)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80023)
    val map = new util.HashMap[String, Object]
    map += "path" -> resource.fullFilePath
    map += "stream" -> resource.inputStream
  }

  private def inputstremToString(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream).mkString

  private def createBMLClient(userName: String): BmlClient = if (userName == null)
    BmlClientFactory.createBmlClient()
  else
    BmlClientFactory.createBmlClient(userName)
} 
Example 41
Source File: QueryUtils.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.jobhistory.util

import java.io.{InputStream, OutputStream}
import java.util.Date

import com.webank.wedatasphere.linkis.common.conf.CommonVars
import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.jobhistory.entity.QueryTask
import com.webank.wedatasphere.linkis.protocol.query.RequestInsertTask
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.fs.FileSystem
import com.webank.wedatasphere.linkis.storage.utils.{FileSystemUtils, StorageUtils}
import org.apache.commons.io.IOUtils
import org.apache.commons.lang.time.DateFormatUtils


object QueryUtils extends Logging {

  private val CODE_STORE_PREFIX = CommonVars("bdp.dataworkcloud.query.store.prefix", "hdfs:///tmp/bdp-ide/")
  private val CODE_STORE_SUFFIX = CommonVars("bdp.dataworkcloud.query.store.suffix", "")
  private val CHARSET = "utf-8"
  private val CODE_SPLIT = ";"
  private val LENGTH_SPLIT = "#"

  def storeExecutionCode(requestInsertTask: RequestInsertTask): Unit = {
    if (requestInsertTask.getExecutionCode.length < 60000) return
    val user: String = requestInsertTask.getUmUser
    val path: String = getCodeStorePath(user)
    val fsPath: FsPath = new FsPath(path)
    val fileSystem = FSFactory.getFsByProxyUser(fsPath, user).asInstanceOf[FileSystem]
    fileSystem.init(null)
    var os: OutputStream = null
    var position = 0L
    val codeBytes = requestInsertTask.getExecutionCode.getBytes(CHARSET)
    path.intern() synchronized {
      Utils.tryFinally {
        if (!fileSystem.exists(fsPath)) FileSystemUtils.createNewFile(fsPath, user, true)
        os = fileSystem.write(fsPath, false)
        position = fileSystem.get(path).getLength
        IOUtils.write(codeBytes, os)
      } {
        IOUtils.closeQuietly(os)
        if (fileSystem != null) fileSystem.close()
      }
    }
    val length = codeBytes.length
    requestInsertTask.setExecutionCode(path + CODE_SPLIT + position + LENGTH_SPLIT + length)
  }

  def exchangeExecutionCode(queryTask: QueryTask): Unit = {
    import scala.util.control.Breaks._
    if (queryTask.getExecutionCode == null || !queryTask.getExecutionCode.startsWith(StorageUtils.HDFS_SCHEMA)) return
    val codePath = queryTask.getExecutionCode
    val path = codePath.substring(0, codePath.lastIndexOf(CODE_SPLIT))
    val codeInfo = codePath.substring(codePath.lastIndexOf(CODE_SPLIT) + 1)
    val infos: Array[String] = codeInfo.split(LENGTH_SPLIT)
    val position = infos(0).toInt
    var lengthLeft = infos(1).toInt
    val tub = new Array[Byte](1024)
    val executionCode: StringBuilder = new StringBuilder
    val fsPath: FsPath = new FsPath(path)
    val fileSystem = FSFactory.getFsByProxyUser(fsPath, queryTask.getUmUser).asInstanceOf[FileSystem]
    fileSystem.init(null)
    var is: InputStream = null
    if (!fileSystem.exists(fsPath)) return
    Utils.tryFinally {
      is = fileSystem.read(fsPath)
      if (position > 0) is.skip(position)
      breakable {
        while (lengthLeft > 0) {
          val readed = is.read(tub)
          val useful = Math.min(readed, lengthLeft)
          if (useful < 0) break()
          lengthLeft -= useful
          executionCode.append(new String(tub, 0, useful, CHARSET))
        }
      }
    } {
      if (fileSystem != null) fileSystem.close()
      IOUtils.closeQuietly(is)
    }
    queryTask.setExecutionCode(executionCode.toString())
  }

  private def getCodeStorePath(user: String): String = {
    val date: String = DateFormatUtils.format(new Date, "yyyyMMdd")
    s"${CODE_STORE_PREFIX.getValue}${user}${CODE_STORE_SUFFIX.getValue}/executionCode/${date}/_scripts"
  }
} 
Example 42
Source File: StorageScriptFsWriter.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.storage.script.writer

import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream}
import java.util

import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.LineRecord
import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData}
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}
import org.apache.commons.io.IOUtils


class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter {

  private val stringBuilder = new StringBuilder

  @scala.throws[IOException]
  override def addMetaData(metaData: MetaData): Unit = {
    val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath)))
    val metadataLine = new util.ArrayList[String]()
    if (compactions.length > 0) {
      metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add)
      if (outputStream != null) {
        IOUtils.writeLines(metadataLine, "\n", outputStream, charset)
      } else {
        import scala.collection.JavaConversions._
        metadataLine.foreach(m => stringBuilder.append(s"$m\n"))
      }
    }
  }

  @scala.throws[IOException]
  override def addRecord(record: Record): Unit = {
    //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中
    val scriptRecord = record.asInstanceOf[LineRecord]
    if (outputStream != null) {
      IOUtils.write(scriptRecord.getLine, outputStream, charset)
    } else {
      stringBuilder.append(scriptRecord.getLine)
    }
  }

  override def close(): Unit = {
    IOUtils.closeQuietly(outputStream)
  }

  override def flush(): Unit = if (outputStream != null) outputStream.flush()

  def getInputStream(): InputStream = {
    new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue))
  }

} 
Example 43
Source File: StorageResultSetReader.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.storage.resultset

import java.io.{ByteArrayInputStream, IOException, InputStream}

import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader}
import com.webank.wedatasphere.linkis.common.io.{MetaData, Record}
import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.domain.Dolphin
import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException
import com.webank.wedatasphere.linkis.storage.utils.StorageUtils

import scala.collection.mutable.ArrayBuffer



  def readLine(): Array[Byte] = {

    var rowLen = 0
    try rowLen = Dolphin.readInt(inputStream)
    catch {
      case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null
      case t: Throwable => throw t
    }

    val rowBuffer = ArrayBuffer[Byte]()
    var len = 0

    //Read the entire line, except for the data of the line length(读取整行,除了行长的数据)
    while (rowLen > 0 && len >= 0) {
      if (rowLen > READ_CACHE)
        len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE)
      else
        len = StorageUtils.readBytes(inputStream,bytes, rowLen)

      if (len > 0) {
        rowLen -= len
        rowBuffer ++= bytes.slice(0, len)
      }
    }
    rowCount = rowCount + 1
    rowBuffer.toArray
  }

  @scala.throws[IOException]
  override def getRecord: Record = {
    if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)")
    if (row ==  null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值,也许IO流已读取完毕或已被关闭!)")
    row
  }

  @scala.throws[IOException]
  override def getMetaData: MetaData = {
    if(metaData == null) init()
    metaData = deserializer.createMetaData(readLine())
    metaData
  }

  @scala.throws[IOException]
  override def skip(recordNum: Int): Int = {
    if(recordNum < 0 ) return -1

    if(metaData == null) getMetaData
    for(i <- recordNum until (0, -1)){
      try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1}
    }
    recordNum
  }

  @scala.throws[IOException]
  override def getPosition: Long = rowCount

  @scala.throws[IOException]
  override def hasNext: Boolean = {
    if(metaData == null) getMetaData
    val line = readLine()
    if(line == null) return  false
    row = deserializer.createRecord(line)
    if(row == null) return  false
    true
  }

  @scala.throws[IOException]
  override def available: Long = inputStream.available()

  override def close(): Unit = inputStream.close()
} 
Example 44
Source File: ResultSetReader.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.storage.resultset

import java.io.InputStream

import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader}
import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.exception.StorageErrorException
import com.webank.wedatasphere.linkis.storage.resultset.table.{TableMetaData, TableRecord, TableResultSet}


object ResultSetReader {

  def getResultSetReader[K <: MetaData, V <: Record](resultSet: ResultSet[K, V], inputStream: InputStream): ResultSetReader[K, V] = {
    new StorageResultSetReader[K, V](resultSet, inputStream)
  }

  def getResultSetReader[K <: MetaData, V <: Record](resultSet: ResultSet[K, V], value: String): ResultSetReader[K, V] = {
    new StorageResultSetReader[K, V](resultSet, value)
  }

  def getResultSetReader(res: String):ResultSetReader[_ <: MetaData, _ <: Record]= {
    val rsFactory = ResultSetFactory.getInstance
    if (rsFactory.isResultSet(res)) {
      val resultSet = rsFactory.getResultSet(res)
      ResultSetReader.getResultSetReader(resultSet, res)
    }else {
      val resPath = new FsPath(res)
      val resultSet = rsFactory.getResultSetByPath(resPath)
      val fs = FSFactory.getFs(resPath)
      fs.init(null)
      ResultSetReader.getResultSetReader(resultSet, fs.read(resPath))
    }
  }

  def getTableResultReader(res: String):ResultSetReader[TableMetaData,TableRecord] = {
    val rsFactory = ResultSetFactory.getInstance
    if (rsFactory.isResultSet(res)) {
      val resultSet = rsFactory.getResultSet(res)
      if (ResultSetFactory.TABLE_TYPE != resultSet.resultSetType()) {
        throw new StorageErrorException(52002, "Result sets that are not tables are not supported(不支持不是表格的结果集)")
      }
      ResultSetReader.getResultSetReader(resultSet.asInstanceOf[TableResultSet], res)
    }else {
      val resPath = new FsPath(res)
      val resultSet = rsFactory.getResultSetByPath(resPath)
      if (ResultSetFactory.TABLE_TYPE != resultSet.resultSetType()) {
        throw new StorageErrorException(52002, "Result sets that are not tables are not supported(不支持不是表格的结果集)")
      }
      val fs = FSFactory.getFs(resPath)
      fs.init(null)
      ResultSetReader.getResultSetReader(resultSet.asInstanceOf[TableResultSet], fs.read(resPath))
    }
  }

} 
Example 45
Source File: Dolphin.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.storage.domain

import java.io.{IOException, InputStream}

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}


  def getIntBytes(value: Int): Array[Byte] = {
    val str = value.toString
    val res = "0" * (INT_LEN - str.length) + str
    Dolphin.getBytes(res)
  }


  def getType(inputStream:InputStream):String = {
    val bytes = new Array[Byte](100)
    val len = StorageUtils.readBytes(inputStream,bytes, Dolphin.MAGIC_LEN + INT_LEN)
    if(len == -1) return null
    getType(Dolphin.getString(bytes, 0, len))
  }

  def getType(content: String): String = {
    if(content.length < MAGIC.length || content.substring(0, MAGIC.length) != MAGIC) throw new IOException(s"File header type must be dolphin,content:$content is not")
    content.substring(MAGIC.length, MAGIC.length + INT_LEN ).toInt.toString
  }

} 
Example 46
Source File: Main.scala    From seals   with Apache License 2.0 5 votes vote down vote up
package com.example.streaming

import java.io.{ InputStream, OutputStream, FileInputStream, FileOutputStream }

import cats.implicits._
import cats.effect.{ IO, IOApp, Blocker, ExitCode }

import fs2.{ Stream, Chunk, Pure }

import dev.tauri.seals.scodec.StreamCodecs._

object Main extends IOApp {

  sealed trait Color
  final case object Brown extends Color
  final case object Grey extends Color

  sealed trait Animal
  final case class Elephant(name: String, tuskLength: Float) extends Animal
  final case class Quokka(name: String, color: Color = Brown) extends Animal
  final case class Quagga(name: String, speed: Double) extends Animal

  def transform(from: InputStream, to: OutputStream)(f: Animal => Stream[Pure, Animal]): IO[Unit] = {
    Blocker[IO].use { blocker =>
      val input = fs2.io.readInputStream(
        IO.pure(from),
        chunkSize = 1024,
        blocker = blocker
      )
      val sIn: Stream[IO, Animal] = input.through(streamDecoderFromReified[Animal].toPipeByte[IO]).flatMap(f)
      val sOut: Stream[IO, Unit] = streamEncoderFromReified[Animal].encode(sIn).flatMap { bv =>
        Stream.chunk(Chunk.bytes(bv.bytes.toArray))
      }.through(fs2.io.writeOutputStream(
        IO.pure(to),
        blocker = blocker,
        closeAfterUse = true
      ))
      sOut.compile.drain
    }
  }

  val transformer: Animal => Stream[Pure, Animal] = {
    case Elephant(n, tl) => Stream(Elephant(n, tl + 17))
    case Quokka(n, Brown) => Stream(Quokka(n, Grey))
    case q @ Quokka(_, _) => Stream(q)
    case Quagga(_, _) => Stream.empty
  }

  override def run(args: List[String]): IO[ExitCode] = {
    val (from, to) = args match {
      case List(from, to, _*) =>
        (from, to)
      case List(from) =>
        (from, "out.bin")
      case _ =>
        ("in.bin", "out.bin")
    }

    val task = transform(new FileInputStream(from), new FileOutputStream(to))(transformer)
    task.as(ExitCode.Success)
  }
} 
Example 47
Source File: SparkTest.scala    From Spark-Scala-Maven-Example   with MIT License 5 votes vote down vote up
package net.martinprobson.spark

import java.io.InputStream

import grizzled.slf4j.Logging
import org.apache.spark.sql.SparkSession
import org.scalatest.{Outcome, fixture}

class SparkTest extends fixture.FunSuite with Logging {

  type FixtureParam = SparkSession

  def withFixture(test: OneArgTest): Outcome = {
    val sparkSession = SparkSession.builder
      .appName("Test-Spark-Local")
      .master("local[2]")
      .getOrCreate()
    try {
      withFixture(test.toNoArgTest(sparkSession))
    } finally sparkSession.stop
  }

  test("empsRDD rowcount") { spark =>
    val empsRDD = spark.sparkContext.parallelize(getInputData("/data/employees.json"), 5)
    assert(empsRDD.count === 1000)
  }

  test("titlesRDD rowcount") { spark =>
    val titlesRDD = spark.sparkContext.parallelize(getInputData("/data/titles.json"), 5)
    assert(titlesRDD.count === 1470)
  }

  private def getInputData(name: String): Seq[String] = {
    val is: InputStream = getClass.getResourceAsStream(name)
    scala.io.Source.fromInputStream(is).getLines.toSeq
  }
} 
Example 48
Source File: FileStreamRecordReader.scala    From geotrellis-pointcloud   with Apache License 2.0 5 votes vote down vote up
package geotrellis.pointcloud.spark.store.hadoop.formats

import org.apache.hadoop.fs._
import org.apache.hadoop.io.compress.CompressionCodecFactory
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input._

import java.io.InputStream

class FileStreamRecordReader[K, V](read: InputStream => (K, V)) extends RecordReader[K, V] {
  private var tup: (K, V) = null
  private var hasNext: Boolean = true

  def initialize(split: InputSplit, context: TaskAttemptContext) = {
    val path = split.asInstanceOf[FileSplit].getPath()
    val conf = context.getConfiguration()
    val fs = path.getFileSystem(conf)

    val is: InputStream = {
      val factory = new CompressionCodecFactory(conf)
      val codec = factory.getCodec(path)

      if (codec == null) fs.open(path)
      else codec.createInputStream(fs.open(path))
    }

    tup = read(is)
  }

  def close = {}
  def getCurrentKey = tup._1
  def getCurrentValue = { hasNext = false ; tup._2 }
  def getProgress = 1
  def nextKeyValue = hasNext
}

trait FileStreamInputFormat[K, V] extends FileInputFormat[K, V] {
  def read(is: InputStream, context: TaskAttemptContext): (K, V)

  override def isSplitable(context: JobContext, fileName: Path) = false

  override def createRecordReader(split: InputSplit, context: TaskAttemptContext): RecordReader[K, V] =
    new FileStreamRecordReader({ is => read(is, context) })
} 
Example 49
Source File: S3PointCloudInputFormat.scala    From geotrellis-pointcloud   with Apache License 2.0 5 votes vote down vote up
package geotrellis.pointcloud.spark.store.s3

import geotrellis.spark.store.s3._
import geotrellis.pointcloud.spark.store.hadoop.formats._
import geotrellis.pointcloud.util.Filesystem

import io.pdal._
import io.circe.Json
import io.circe.syntax._
import cats.syntax.either._
import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext}
import org.apache.commons.io.FileUtils

import java.io.{File, InputStream}
import java.net.URI

import scala.collection.JavaConverters._


    mode match {
      case "s3" =>
        new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) {
          def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = {
            val s3Pipeline =
              pipeline
                .hcursor
                .downField("pipeline").downArray
                .downField("filename").withFocus(_ => uri.toString.asJson)
                .top.fold(pipeline)(identity)

            executePipeline(context)(key, s3Pipeline)
          }
        }

      case _ =>
        val tmpDir = {
          val dir = PointCloudInputFormat.getTmpDir(context)
          if (dir == null) Filesystem.createDirectory()
          else Filesystem.createDirectory(dir)
        }

        new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) {
          def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = {
            // copy remote file into local tmp dir
            tmpDir.mkdirs() // to be sure that dirs created
            val localPath = new File(tmpDir, key.replace("/", "_"))
            FileUtils.copyInputStreamToFile(is, localPath)
            is.close()

            // use local filename path if it's present in json
            val localPipeline =
              pipeline
                .hcursor
                .downField("pipeline").downArray
                .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson)
                .top.fold(pipeline)(identity)

            try executePipeline(context)(key, localPipeline) finally {
              localPath.delete()
              tmpDir.delete()
            }
          }
        }
    }
  }
} 
Example 50
Source File: CsvPublisher.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.csv

import java.io.InputStream
import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}

import com.sksamuel.exts.Logging
import com.sksamuel.exts.io.Using
import com.univocity.parsers.csv.CsvParser
import io.eels.Row
import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription}
import io.eels.schema.StructType

class CsvPublisher(createParser: () => CsvParser,
                   inputFn: () => InputStream,
                   header: Header,
                   schema: StructType) extends Publisher[Seq[Row]] with Logging with Using {

  val rowsToSkip: Int = header match {
    case Header.FirstRow => 1
    case _ => 0
  }

  override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {

    val input = inputFn()
    val parser = createParser()

    try {
      parser.beginParsing(input)

      val running = new AtomicBoolean(true)
      subscriber.subscribed(Subscription.fromRunning(running))

      logger.debug(s"CSV Source will skip $rowsToSkip rows")

      val count = new AtomicLong(0)

      Iterator.continually(parser.parseNext)
        .takeWhile(_ != null)
        .takeWhile(_ => running.get)
        .drop(rowsToSkip)
        .map { record => Row(schema, record.toVector) }
        .grouped(DataStream.DefaultBatchSize)
        .foreach { ts =>
          count.addAndGet(ts.size)
          subscriber.next(ts)
        }

      logger.debug(s"All ${count.get} rows read, notifying subscriber")
      subscriber.completed()

    } catch {
      case t: Throwable =>
        logger.error(s"Error in CSV Source, subscriber will be notified", t)
        subscriber.error(t)

    } finally {
      logger.debug("Closing CSV source resources")
      parser.stopParsing()
    }
  }
} 
Example 51
Source File: ZincAnalysisParserTest.scala    From exodus   with MIT License 5 votes vote down vote up
package com.wix.build.zinc.analysis

import java.io.InputStream
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}
import java.util.UUID

import com.github.marschall.memoryfilesystem.MemoryFileSystemBuilder
import com.wixpress.build.maven.Coordinates
import org.specs2.mutable.SpecificationWithJUnit
import org.specs2.specification.Scope

class ZincAnalysisParserTest extends SpecificationWithJUnit {
  "ZincAnalysisParser" should {
    "parse repo with zinc analysis" in new baseCtx {
      private val parser = new ZincAnalysisParser(repoRoot)
      private val coordinatesToAnalyses: Map[Coordinates, List[ZincModuleAnalysis]] = parser.readModules()
      coordinatesToAnalyses must haveLength(greaterThan(0))
      private val analysisList: List[ZincModuleAnalysis] = coordinatesToAnalyses.head._2
      analysisList must haveLength(greaterThan(0))
    }
  }

  abstract class baseCtx extends Scope {
    val fileSystem = MemoryFileSystemBuilder.newLinux().build()
    val repoRoot = fileSystem.getPath("repoRoot")
    Files.createDirectories(repoRoot)
    writeResourceAsFileToPath("/pom.xml", "pom.xml", "java-junit-sample/")
    writeResourceAsFileToPath("/aggregate-pom.xml", "pom.xml", "")
    writeResourceAsFileToPath("/compile.relations", "compile.relations","java-junit-sample/target/analysis/")
    writeResourceAsFileToPath("/test-compile.relations", "test-compile.relations","java-junit-sample/target/analysis/")

    private def writeResourceAsFileToPath(resource: String, fileName: String, path: String) = {
      if (path.nonEmpty)
        Files.createDirectories(repoRoot.resolve(path))
      val stream: InputStream = getClass.getResourceAsStream(s"$resource")
      val compileRelations = scala.io.Source.fromInputStream(stream).mkString
      Files.write(repoRoot.resolve(s"$path$fileName"), compileRelations.getBytes(StandardCharsets.UTF_8))
    }

    def path(withName: String) = repoRoot.resolve(withName)
    def random = UUID.randomUUID().toString
  }
} 
Example 52
Source File: PredefinedTag.scala    From smui   with Apache License 2.0 5 votes vote down vote up
package models

import java.io.InputStream
import java.sql.Connection

import play.api.Logger
import play.api.libs.json.{Json, OFormat}

case class PredefinedTag(property: Option[String],
                         value: String,
                         solrIndexName: Option[String],
                         exported: Option[Boolean]) {

}

object PredefinedTag {

  val logger = Logger(getClass)

  implicit val jsonFormat: OFormat[PredefinedTag] = Json.format[PredefinedTag]

  def fromStream(stream: InputStream): Seq[PredefinedTag] = {
    try {
      Json.parse(stream).as[Seq[PredefinedTag]]
    } finally {
      stream.close()
    }
  }

  def updateInDB(predefinedTags: Seq[PredefinedTag])(implicit connection: Connection): (Seq[InputTagId], Seq[InputTag]) = {
    val indexIdsByName = SolrIndex.listAll.map(i => i.name -> i.id).toMap
    val tagsInDBByContent = InputTag.loadAll().map(t => t.tagContent -> t).toMap

    val newTags = predefinedTags.map { tag =>
      TagContent(tag.solrIndexName.flatMap(indexIdsByName.get), tag.property, tag.value) -> tag
    }.toMap

    val toDelete = tagsInDBByContent.filter { case (content, tag) => tag.predefined && !newTags.contains(content) }.map(_._2.id).toSeq
    val toInsert = newTags.filter(t => !tagsInDBByContent.contains(t._1)).map { case (tc, t) =>
      InputTag.create(tc.solrIndexId, t.property, t.value, t.exported.getOrElse(true), predefined = true)
    }.toSeq

    InputTag.insert(toInsert: _*)
    InputTag.deleteByIds(toDelete)
    if (toDelete.nonEmpty || toInsert.nonEmpty) {
      logger.info(s"Inserted ${toInsert.size} new predefined tags into the DB and deleted ${toDelete.size} no longer existing predefined tags.")
    }

    (toDelete, toInsert)
  }

} 
Example 53
Source File: SchemaReader.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.app.mock

import java.io.{File, InputStream}

import org.apache.avro.Schema

object SchemaReader {

  def readFromResources(p: String): Schema = {
    read(getClass.getClassLoader.getResourceAsStream(p))
  }

  def read(f: File): Schema = {
    val parser = new Schema.Parser()
    parser.parse(f)
  }

  def read(s: String): Schema = {
    val parser = new Schema.Parser()
    parser.parse(s)
  }

  def read(is: InputStream): Schema = {
    val parser = new Schema.Parser()
    parser.parse(is)
  }
} 
Example 54
Source File: JsonProtocol.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.connector.rest

import java.io.InputStream

import org.apache.avro.Schema
import org.codehaus.jackson.map.ObjectMapper
import org.codehaus.jackson.node.JsonNodeFactory
import it.agilelab.darwin.common.compat._

trait JsonProtocol {
  val objectMapper = new ObjectMapper()

  def toJson(schemas : Seq[(Long,Schema)]): String = {

    val data = schemas.map {
      case (_, schema) =>
        objectMapper.readTree(schema.toString)
    }.foldLeft(JsonNodeFactory.instance.arrayNode()) {
      case (array, node) =>
        array.add(node)
        array
    }

    objectMapper.writeValueAsString(data)
  }

  def toSeqOfIdSchema(in: InputStream): Seq[(Long, Schema)] = {
    val node = objectMapper.readTree(in)

    node.getElements.toScala.map { node =>
      val id = node.get("id").asText().toLong
      val schemaNode = node.get("schema")

      val schemaToString = objectMapper.writeValueAsString(schemaNode)

      val parser = new Schema.Parser()

      val schema = parser.parse(schemaToString)

      (id, schema)
    }.toVector
  }


  def toSchema(in: InputStream): Schema = {
    val parser = new Schema.Parser()
    parser.parse(in)
  }
} 
Example 55
Source File: ImageLoaderUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.loaders

import java.awt.image.BufferedImage
import java.io.{InputStream, ByteArrayInputStream}
import java.net.URI
import java.util.zip.GZIPInputStream
import javax.imageio.ImageIO

import keystoneml.loaders.VOCLoader._
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import keystoneml.pipelines.Logging
import keystoneml.utils._

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

object ImageLoaderUtils extends Logging {
  
  def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag](
      filePathsRDD: RDD[URI],
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits.
      namePrefix: Option[String] = None): RDD[I] = {
    filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix))
  }

  private def loadFile[L, I <: AbstractLabeledImage[L]](
      fileUri: URI,
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I,
      namePrefix: Option[String]): Iterator[I] = {
    val filePath = new Path(fileUri)
    val conf = new Configuration(true)
    val fs = FileSystem.get(filePath.toUri(), conf)
    val fStream = fs.open(filePath)

    val tarStream = new ArchiveStreamFactory().createArchiveInputStream(
      "tar", fStream).asInstanceOf[TarArchiveInputStream]

    var entry = tarStream.getNextTarEntry()
    val imgs = new ArrayBuffer[I]
    while (entry != null) {
      if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) {
        var offset = 0
        var ret = 0
        val content = new Array[Byte](entry.getSize().toInt)
        while (ret >= 0 && offset != entry.getSize()) {
          ret = tarStream.read(content, offset, content.length - offset)
          if (ret >= 0) {
            offset += ret
          }
        }

        val bais = new ByteArrayInputStream(content)

        val image = ImageUtils.loadImage(bais).map { img =>
          imageBuilder(img, labelsMap(entry.getName), Some(entry.getName))
        }

        imgs ++= image
      }
      entry = tarStream.getNextTarEntry()
    }

    imgs.iterator
  }
} 
Example 56
Source File: JsonConverters.scala    From mango   with Apache License 2.0 5 votes vote down vote up
package com.kakao.mango.json

import java.io.InputStream

import com.kakao.shaded.jackson.core.{JsonParser, JsonToken}
import com.kakao.shaded.jackson.databind.{DeserializationFeature, ObjectMapper, ObjectWriter}
import com.kakao.shaded.jackson.module.afterburner.AfterburnerModule
import com.kakao.shaded.jackson.module.scala.DefaultScalaModule

import scala.language.implicitConversions
import scala.reflect._


  def streamJson(parser: JsonParser): JsonIterator = {
    val accessor = JsonTokenAccessor(parser)

    new Iterator[(JsonToken, JsonTokenAccessor)] {
      override def hasNext: Boolean = !parser.isClosed
      override def next(): (JsonToken, JsonTokenAccessor) = {
        val token = parser.nextToken()
        if (token == null) parser.close()
        (token, accessor)
      }
    }
  }

} 
Example 57
Source File: CreateJacksonParser.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import java.nio.channels.Channels
import java.nio.charset.Charset

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
import sun.nio.cs.StreamDecoder

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String

private[sql] object CreateJacksonParser extends Serializable {
  def string(jsonFactory: JsonFactory, record: String): JsonParser = {
    jsonFactory.createParser(record)
  }

  def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = {
    val bb = record.getByteBuffer
    assert(bb.hasArray)

    val bain = new ByteArrayInputStream(
      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
  }

  def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
    jsonFactory.createParser(record.getBytes, 0, record.getLength)
  }

  // Jackson parsers can be ranked according to their performance:
  // 1. Array based with actual encoding UTF-8 in the array. This is the fastest parser
  //    but it doesn't allow to set encoding explicitly. Actual encoding is detected automatically
  //    by checking leading bytes of the array.
  // 2. InputStream based with actual encoding UTF-8 in the stream. Encoding is detected
  //    automatically by analyzing first bytes of the input stream.
  // 3. Reader based parser. This is the slowest parser used here but it allows to create
  //    a reader with specific encoding.
  // The method creates a reader for an array with given encoding and sets size of internal
  // decoding buffer according to size of input array.
  private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = {
    val bais = new ByteArrayInputStream(in, 0, length)
    val byteChannel = Channels.newChannel(bais)
    val decodingBufferSize = Math.min(length, 8192)
    val decoder = Charset.forName(enc).newDecoder()

    StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize)
  }

  def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = {
    val sd = getStreamDecoder(enc, record.getBytes, record.getLength)
    jsonFactory.createParser(sd)
  }

  def inputStream(jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(is)
  }

  def inputStream(enc: String, jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(new InputStreamReader(is, enc))
  }

  def internalRow(jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val ba = row.getBinary(0)

    jsonFactory.createParser(ba, 0, ba.length)
  }

  def internalRow(enc: String, jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val binary = row.getBinary(0)
    val sd = getStreamDecoder(enc, binary, binary.length)

    jsonFactory.createParser(sd)
  }
} 
Example 58
Source File: PythonSQLUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.api.python

import java.io.InputStream
import java.nio.channels.Channels

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.python.PythonRDDServer
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.types.DataType

private[sql] object PythonSQLUtils {
  def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)

  // This is needed when generating SQL documentation for built-in functions.
  def listBuiltinFunctionInfos(): Array[ExpressionInfo] = {
    FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
  }

  
private[sql] class ArrowRDDServer(sqlContext: SQLContext) extends PythonRDDServer {

  override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = {
    // Create array to consume iterator so that we can safely close the inputStream
    val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray
    // Parallelize the record batches to create an RDD
    JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length))
  }

} 
Example 59
Source File: CodecStreams.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.{InputStream, OutputStream, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress._
import org.apache.hadoop.mapreduce.JobContext
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.util.ReflectionUtils

import org.apache.spark.TaskContext

object CodecStreams {
  private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = {
    val compressionCodecs = new CompressionCodecFactory(config)
    Option(compressionCodecs.getCodec(file))
  }

  def createInputStream(config: Configuration, file: Path): InputStream = {
    val fs = file.getFileSystem(config)
    val inputStream: InputStream = fs.open(file)

    getDecompressionCodec(config, file)
      .map(codec => codec.createInputStream(inputStream))
      .getOrElse(inputStream)
  }

  
  def getCompressionExtension(context: JobContext): String = {
    getCompressionCodec(context)
      .map(_.getDefaultExtension)
      .getOrElse("")
  }
} 
Example 60
Source File: OffsetSeqLog.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming


import java.io.{InputStream, OutputStream}
import java.nio.charset.StandardCharsets._

import scala.io.{Source => IOSource}

import org.apache.spark.sql.SparkSession


class OffsetSeqLog(sparkSession: SparkSession, path: String)
  extends HDFSMetadataLog[OffsetSeq](sparkSession, path) {

  override protected def deserialize(in: InputStream): OffsetSeq = {
    // called inside a try-finally where the underlying stream is closed in the caller
    def parseOffset(value: String): Offset = value match {
      case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null
      case json => SerializedOffset(json)
    }
    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
    if (!lines.hasNext) {
      throw new IllegalStateException("Incomplete log file")
    }

    val version = parseVersion(lines.next(), OffsetSeqLog.VERSION)

    // read metadata
    val metadata = lines.next().trim match {
      case "" => None
      case md => Some(md)
    }
    OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*)
  }

  override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = {
    // called inside a try-finally where the underlying stream is closed in the caller
    out.write(("v" + OffsetSeqLog.VERSION).getBytes(UTF_8))

    // write metadata
    out.write('\n')
    out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8))

    // write offsets, one per line
    offsetSeq.offsets.map(_.map(_.json)).foreach { offset =>
      out.write('\n')
      offset match {
        case Some(json: String) => out.write(json.getBytes(UTF_8))
        case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8))
      }
    }
  }
}

object OffsetSeqLog {
  private[streaming] val VERSION = 1
  private val SERIALIZED_VOID_OFFSET = "-"
} 
Example 61
Source File: CommitLog.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.io.{InputStream, OutputStream}
import java.nio.charset.StandardCharsets._

import scala.io.{Source => IOSource}

import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.sql.SparkSession


class CommitLog(sparkSession: SparkSession, path: String)
  extends HDFSMetadataLog[CommitMetadata](sparkSession, path) {

  import CommitLog._

  override protected def deserialize(in: InputStream): CommitMetadata = {
    // called inside a try-finally where the underlying stream is closed in the caller
    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
    if (!lines.hasNext) {
      throw new IllegalStateException("Incomplete log file in the offset commit log")
    }
    parseVersion(lines.next.trim, VERSION)
    val metadataJson = if (lines.hasNext) lines.next else EMPTY_JSON
    CommitMetadata(metadataJson)
  }

  override protected def serialize(metadata: CommitMetadata, out: OutputStream): Unit = {
    // called inside a try-finally where the underlying stream is closed in the caller
    out.write(s"v${VERSION}".getBytes(UTF_8))
    out.write('\n')

    // write metadata
    out.write(metadata.json.getBytes(UTF_8))
  }
}

object CommitLog {
  private val VERSION = 1
  private val EMPTY_JSON = "{}"
}


case class CommitMetadata(nextBatchWatermarkMs: Long = 0) {
  def json: String = Serialization.write(this)(CommitMetadata.format)
}

object CommitMetadata {
  implicit val format = Serialization.formats(NoTypeHints)

  def apply(json: String): CommitMetadata = Serialization.read[CommitMetadata](json)
} 
Example 62
Source File: ProcessTestUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
} 
Example 63
Source File: SQLRunnerSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package com.sap.spark.cli

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}

import org.apache.spark.SparkContext
import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext}
import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers}



    // good call
    val goodOpts =
      SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv"))

    goodOpts.sqlFiles should be(List("a.sql", "b.sql"))
    goodOpts.output should be(Some("output.csv"))

    // bad call
    val badOpts = SQLRunner.parseOpts(List())

    badOpts.sqlFiles should be(List())
    badOpts.output should be(None)

    // ugly call
    val uglyOpts =
      SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql"))

    uglyOpts.sqlFiles should be(List("a.sql", "b.sql"))
    uglyOpts.output should be(Some("output.csv"))
  }

  def runSQLTest(input: String, expectedOutput: String): Unit = {
    val inputStream: InputStream = new ByteArrayInputStream(input.getBytes())
    val outputStream = new ByteArrayOutputStream()

    SQLRunner.sql(inputStream, outputStream)

    val output = outputStream.toString
    output should be(expectedOutput)
  }

  test("can run dummy query") {
    val input = "SELECT 1;"
    val output = "1\n"

    runSQLTest(input, output)
  }

  test("can run multiple dummy queries") {
    val input = """
        |SELECT 1;SELECT 2;
        |SELECT 3;
      """.stripMargin

    val output = "1\n2\n3\n"

    runSQLTest(input, output)
  }

  test("can run a basic example with tables") {
    val input = """
                  |SELECT * FROM DEMO_TABLE;
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }

  test("can run an example with comments") {
    val input = """
                  |SELECT * FROM DEMO_TABLE; -- this is the first query
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |-- now let's drop a table
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }
} 
Example 64
Source File: S3Client.scala    From akka-persistence-s3   with MIT License 5 votes vote down vote up
package akka.persistence.s3

import java.io.InputStream

import com.amazonaws.auth.{ BasicAWSCredentials, DefaultAWSCredentialsProviderChain }
import com.amazonaws.services.s3.{ S3ClientOptions, AmazonS3Client }
import com.amazonaws.services.s3.model._

import scala.concurrent.{ Future, ExecutionContext }

trait S3Client {
  val s3ClientConfig: S3ClientConfig

  lazy val client: AmazonS3Client = {
    val client =
      if (s3ClientConfig.awsUseDefaultCredentialsProviderChain)
        new AmazonS3Client(new DefaultAWSCredentialsProviderChain).withRegion(s3ClientConfig.region)
      else
        new AmazonS3Client(new BasicAWSCredentials(s3ClientConfig.awsKey, s3ClientConfig.awsSecret))

    s3ClientConfig.endpoint.foreach { endpoint =>
      client.withEndpoint(endpoint)
      ()
    }
    client.setS3ClientOptions(new S3ClientOptions()
      .withPathStyleAccess(s3ClientConfig.options.pathStyleAccess)
      .withChunkedEncodingDisabled(s3ClientConfig.options.chunkedEncodingDisabled))
    client
  }

  def createBucket(bucketName: String)(implicit ec: ExecutionContext): Future[Bucket] = Future {
    client.createBucket(bucketName)
  }

  def deleteBucket(bucketName: String)(implicit ec: ExecutionContext): Future[Unit] = Future {
    client.deleteBucket(bucketName)
  }

  def putObject(bucketName: String, key: String, input: InputStream, metadata: ObjectMetadata)(implicit ec: ExecutionContext): Future[PutObjectResult] = Future {
    client.putObject(new PutObjectRequest(bucketName, key, input, metadata))
  }

  def getObject(bucketName: String, key: String)(implicit ec: ExecutionContext): Future[S3Object] = Future {
    client.getObject(new GetObjectRequest(bucketName, key))
  }

  def listObjects(request: ListObjectsRequest)(implicit ec: ExecutionContext): Future[ObjectListing] = Future {
    client.listObjects(request)
  }

  def deleteObject(bucketName: String, key: String)(implicit ec: ExecutionContext): Future[Unit] = Future {
    client.deleteObject(bucketName, key)
  }

  def deleteObjects(request: DeleteObjectsRequest)(implicit ec: ExecutionContext): Future[Unit] = Future {
    client.deleteObjects(request)
  }
} 
Example 65
Source File: EnrichTruckData.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package com.orendainx.trucking.nifi.processors

import java.io.{InputStream, OutputStream}
import java.nio.charset.StandardCharsets
import java.util.concurrent.atomic.AtomicReference
import java.util.Scanner

import com.orendainx.trucking.commons.models.{EnrichedTruckData, TruckData}
import com.orendainx.trucking.enrichment.WeatherAPI
import org.apache.nifi.annotation.behavior._
import org.apache.nifi.annotation.documentation.{CapabilityDescription, Tags}
import org.apache.nifi.components.PropertyDescriptor
import org.apache.nifi.logging.ComponentLog
import org.apache.nifi.processor.io.InputStreamCallback
import org.apache.nifi.processor.io.OutputStreamCallback
import org.apache.nifi.processor._

import scala.collection.JavaConverters._


@Tags(Array("trucking", "data", "event", "enrich", "iot"))
@CapabilityDescription("Enriches simulated truck sensor data. Find the master project and its code, documentation and corresponding tutorials at: https://github.com/orendain/trucking-iot")
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@TriggerSerially
@WritesAttributes(Array(
  new WritesAttribute(attribute = "dataType", description = "The class name of the resulting enriched data type.")
))
class EnrichTruckData extends AbstractProcessor {

  private var log: ComponentLog = _
  private val RelSuccess = new Relationship.Builder().name("success").description("All generated data is routed to this relationship.").build

  override def init(context: ProcessorInitializationContext): Unit = {
    log = context.getLogger
  }

  override def onTrigger(context: ProcessContext, session: ProcessSession): Unit = {

    var flowFile = session.get
    log.debug(s"Flowfile received: $flowFile")

    // Convert the entire stream of bytes from the flow file into a string
    val content = new AtomicReference[String]
    session.read(flowFile, new InputStreamCallback {
      override def process(inputStream: InputStream) = {
        val scanner = new Scanner(inputStream).useDelimiter("\\A")
        val result = if (scanner.hasNext()) scanner.next() else ""
        log.debug(s"Parsed content: $result")
        content.set(result)
      }
    })

    // Form a TruckData object from content, then creating an EnrichedTruckData object by making the appropriate
    // calls to WeatherAPI
    val truckData = TruckData.fromCSV(content.get())
    val enrichedTruckData = EnrichedTruckData(truckData, WeatherAPI.default.getFog(truckData.eventType),
      WeatherAPI.default.getRain(truckData.eventType), WeatherAPI.default.getWind(truckData.eventType))

    log.debug(s"EnrichedData generated: $enrichedTruckData")

    // Add the new data type as a flow file attribute
    flowFile = session.putAttribute(flowFile, "dataType", enrichedTruckData.getClass.getSimpleName)

    // Replace the flow file, writing in the new content
    flowFile = session.write(flowFile, new OutputStreamCallback {
      override def process(outputStream: OutputStream) =
        outputStream.write(enrichedTruckData.toCSV.getBytes(StandardCharsets.UTF_8))
    })

    // TODO: document what this does
    session.getProvenanceReporter.route(flowFile, RelSuccess)
    session.transfer(flowFile, RelSuccess)
    session.commit()
  }

  // Define properties and relationships
  override def getSupportedPropertyDescriptors: java.util.List[PropertyDescriptor] = List.empty[PropertyDescriptor].asJava

  override def getRelationships: java.util.Set[Relationship] = Set(RelSuccess).asJava
} 
Example 66
Source File: FileUploadServlet.scala    From udash-core   with Apache License 2.0 5 votes vote down vote up
package io.udash.rpc.utils

import java.io.InputStream
import java.nio.file.Paths
import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
import com.avsystem.commons._


  protected def handleFile(name: String, content: InputStream): Unit

  override protected def doPost(request: HttpServletRequest, response: HttpServletResponse): Unit = {
    request.getParts.asScala
      .filter(part => fileFields.contains(part.getName))
      .foreach(filePart => {
        val fileName = Paths.get(filePart.getSubmittedFileName).getFileName.toString
        val fileContent = filePart.getInputStream
        handleFile(fileName, fileContent)
        fileContent.close()
      })
  }
} 
Example 67
Source File: StreamExtensions.scala    From random-projections-at-berlinbuzzwords   with Apache License 2.0 5 votes vote down vote up
package com.stefansavev.core.serialization

import java.io.{InputStream, OutputStream}


object StreamExtensions{

  implicit class IntSerializerExt(outputStream: OutputStream) {
    def writeInt(value: Int): Unit = {
      IntSerializer.write(outputStream, value)
    }
  }

  implicit class IntDeSerializerExt(inputStream: InputStream) {
    def readInt(): Int = {
      IntSerializer.read(inputStream)
    }
  }

  implicit class DoubleArraySerializerExt(outputStream: OutputStream) {
    def writeDoubleArray(values: Array[Double]): Unit = {
      DoubleArraySerializer.write(outputStream, values)
    }
  }

  implicit class DoubleArrayDeSerializerExt(inputStream: InputStream) {
    def readDoubleArray(): Array[Double] = {
      DoubleArraySerializer.read(inputStream)
    }
  }

} 
Example 68
Source File: GenericArraySerializer.scala    From random-projections-at-berlinbuzzwords   with Apache License 2.0 5 votes vote down vote up
package com.stefansavev.core.serialization

import java.io.{InputStream, OutputStream}

import com.stefansavev.core.serialization.PrimitiveTypeSerializers.TypedIntSerializer

import scala.reflect.ClassTag

object GenericArraySerializer {

  
  //
  class GenericArraySerializer[T: ClassTag](ev: T <:< AnyRef, nestedSerializer: TypedSerializer[T]) extends TypedSerializer[Array[T]] {
    val clazz = scala.reflect.classTag[T].runtimeClass
    if (clazz.equals(Int.getClass) || clazz.equals(Double.getClass)) {
      throw new IllegalStateException("GenericArraySerializer should not be applied to primitive types")
    }

    def toBinary(outputStream: OutputStream, input: Array[T]): Unit = {
      TypedIntSerializer.toBinary(outputStream, input.length)
      var i = 0
      while (i < input.length) {
        nestedSerializer.toBinary(outputStream, input(i))
        i += 1
      }
    }

    def fromBinary(inputStream: InputStream): Array[T] = {
      val len = TypedIntSerializer.fromBinary(inputStream)
      val output = Array.ofDim[T](len)
      var i = 0
      while (i < len) {
        output(i) = nestedSerializer.fromBinary(inputStream)
        i += 1
      }
      output
    }

    def name: String = s"GenericArraySerializer(${nestedSerializer.name})"
  }

  implicit def genericArraySerializer[T: ClassTag](implicit ev: T <:< AnyRef, nestedSerializer: TypedSerializer[T]): GenericArraySerializer[T] = {
    new GenericArraySerializer[T](ev, nestedSerializer)
  }
} 
Example 69
Source File: IsoSerializers.scala    From random-projections-at-berlinbuzzwords   with Apache License 2.0 5 votes vote down vote up
package com.stefansavev.core.serialization

import java.io.{InputStream, OutputStream}

object IsoSerializers {
  class IsoSerializer[A, B](iso: Iso[A, B], serB: TypedSerializer[B]) extends TypedSerializer[A]{
    def toBinary(outputStream: OutputStream, input: A): Unit = {
      serB.toBinary(outputStream, iso.from(input))
    }

    def fromBinary(inputStream: InputStream): A = {
      iso.to(serB.fromBinary(inputStream))
    }

    def name: String = s"IsoSerializer(via = ${serB.name})"
  }

  implicit def isoSerializer[A, B](implicit iso: Iso[A, B], serB: TypedSerializer[B]): IsoSerializer[A, B] = {
    new IsoSerializer[A, B](iso, serB)
  }
} 
Example 70
Source File: String2IdHasherSerializer.scala    From random-projections-at-berlinbuzzwords   with Apache License 2.0 5 votes vote down vote up
package com.stefansavev.core.string2id

import java.io.{InputStream, OutputStream}

import com.stefansavev.core.serialization.{StringSerializer, IntSerializer, TypedSerializer}
import com.stefansavev.core.serialization.StringSerializer

object String2IdHasherSerialization {

  implicit object String2IdHasherSerializer extends TypedSerializer[String2IdHasher] {
    def toBinary(outputStream: OutputStream, string2Id: String2IdHasher): Unit = {
      if (string2Id == null){
        throw new IllegalStateException("string2IdHasher cannot be null")
      }
      val settings = string2Id.getSettings()
      IntSerializer.write(outputStream, settings.maxValues)
      IntSerializer.write(outputStream, settings.avgStringLen)
      IntSerializer.write(outputStream, settings.toleratedNumberOfCollisions)
      var id = 0
      val numStrings = string2Id.numberOfUniqueStrings()
      IntSerializer.write(outputStream, numStrings)
      while(id < numStrings){
        val str = string2Id.getStringAtInternalIndex(id).get
        StringSerializer.write(outputStream, str)
        id += 1
      }
    }

    def fromBinary(inputStream: InputStream): String2IdHasher = {
      
      val maxValues = IntSerializer.read(inputStream)
      val avgStringLen = IntSerializer.read(inputStream)
      val numCollisions = IntSerializer.read(inputStream)
      val settings = new StringIdHasherSettings(maxValues, avgStringLen, numCollisions)
      val string2IdHasher = new String2IdHasher(settings)
      val numStrings = IntSerializer.read(inputStream)
      var i = 0
      while(i < numStrings){
        val str = StringSerializer.read(inputStream)
        val handle = string2IdHasher.add(str)
        val index = string2IdHasher.getInternalId(handle)
        if (index != i){
          throw new IllegalStateException("Internal error while reading hashed strings")
        }
        i += 1
      }
      string2IdHasher
    }

    def name: String = "String2IdHasherSerializer"
  }


} 
Example 71
Source File: utils.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.http

import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.TimestampType
import org.apache.spark.SparkConf
import org.apache.commons.io.IOUtils
import org.apache.spark.serializer.KryoSerializer
import java.io.InputStream
import com.esotericsoftware.kryo.io.Input
import java.io.ByteArrayOutputStream

class WrongArgumentException(name: String, value: Any)
		extends RuntimeException(s"wrong argument: $name=$value") {
}

class MissingRequiredArgumentException(map: Map[String, String], paramName: String)
		extends RuntimeException(s"missing required argument: $paramName, all parameters=$map") {
}

class InvalidSerializerNameException(serializerName: String)
		extends RuntimeException(s"invalid serializer name: $serializerName") {
}

object SchemaUtils {
	def buildSchema(schema: StructType, includesTimestamp: Boolean, timestampColumnName: String = "_TIMESTAMP_"): StructType = {
		if (!includesTimestamp)
			schema;
		else
			StructType(schema.fields.toSeq :+ StructField(timestampColumnName, TimestampType, false));
	}
}

object Params {
	
	def deserialize(bytes: Array[Byte]): Any = {
		val kryo = kryoSerializer.newKryo();
		val input = new Input();
		input.setBuffer(bytes);
		kryo.readClassAndObject(input);
	}
} 
Example 72
Source File: SerializerFactory.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.http

import java.nio.ByteBuffer
import org.apache.spark.serializer.SerializerInstance
import org.apache.spark.serializer.DeserializationStream
import org.apache.spark.serializer.SerializationStream
import java.io.OutputStream
import java.io.InputStream
import scala.reflect.ClassTag
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.spark.SparkConf
import org.apache.spark.serializer.JavaSerializer
import org.apache.spark.serializer.KryoSerializer


object SerializerFactory {
	val DEFAULT = new SerializerFactory {
		override def getSerializerInstance(serializerName: String): SerializerInstance = {
			serializerName.toLowerCase() match {
				case "kryo" ⇒
					new KryoSerializer(new SparkConf()).newInstance();
				case "java" ⇒
					new JavaSerializer(new SparkConf()).newInstance();
				case _ ⇒ throw new InvalidSerializerNameException(serializerName);
			}
		}
	}
}

trait SerializerFactory {
	def getSerializerInstance(serializerName: String): SerializerInstance;
} 
Example 73
Source File: UtilsTest.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
import java.sql.Date

import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.SparkSession
import org.junit.Assert
import org.junit.Test
import java.io.ByteArrayOutputStream
import java.io.InputStream
import org.apache.commons.io.IOUtils
import com.esotericsoftware.kryo.io.Input
import org.apache.spark.sql.execution.streaming.http.KryoSerializerUtils

class UtilsTest {
	@Test
	def testKryoSerDe() {
		val d1 = new Date(30000);
		val bytes = KryoSerializerUtils.serialize(d1);
		val d2 = KryoSerializerUtils.deserialize(bytes);
		Assert.assertEquals(d1, d2);

		val d3 = Map('x' -> Array("aaa", "bbb"), 'y' -> Array("ccc", "ddd"));
		println(d3);
		val bytes2 = KryoSerializerUtils.serialize(d3);
		val d4 = KryoSerializerUtils.deserialize(bytes2).asInstanceOf[Map[String, Any]];
		println(d4);
	}

	@Test
	def testEncoderSchema() {
		val spark = SparkSession.builder.master("local[4]")
			.getOrCreate();
		val sqlContext = spark.sqlContext;
		import sqlContext.implicits._
		import org.apache.spark.sql.catalyst.encoders.encoderFor
		val schema1 = encoderFor[String].schema;
		val schema2 = encoderFor[(String)].schema;
		val schema3 = encoderFor[((String))].schema;

		Assert.assertEquals(schema1, schema2);
		Assert.assertEquals(schema1, schema3);
	}

	@Test
	def testDateInTuple() {
		val spark = SparkSession.builder.master("local[4]")
			.getOrCreate();
		val sqlContext = spark.sqlContext;
		import sqlContext.implicits._

		val d1 = new Date(30000);
		val ds = sqlContext.createDataset(Seq[(Int, Date)]((1, d1)));
		val d2 = ds.collect()(0)._2;

		//NOTE: d1!=d2, maybe a bug
		println(d1.equals(d2));
	}
} 
Example 74
Source File: FileUtils.scala    From skeuomorph   with Apache License 2.0 5 votes vote down vote up
package higherkindness.skeuomorph

import java.io.{File, FileOutputStream, InputStream}
import java.nio.file.{Files, Paths, StandardOpenOption}

import cats.effect.{Resource, Sync}

object FileUtils {
  def fileHandle[F[_]: Sync](name: String): Resource[F, File] =
    Resource.make(
      Sync[F].delay(new File(name))
    )(file => Sync[F].delay(file.deleteOnExit()))

  def fileOutputStream[F[_]: Sync](file: File): Resource[F, FileOutputStream] =
    Resource.make(
      Sync[F].delay(new FileOutputStream(file))
    )(fos => Sync[F].delay(fos.close()))

  def fileInputStream[F[_]: Sync](name: String): Resource[F, InputStream] =
    Resource.make(
      Sync[F].delay(Files.newInputStream(Paths.get(name), StandardOpenOption.DELETE_ON_CLOSE))
    )(is => Sync[F].delay(is.close()))
} 
Example 75
Source File: BrokerResources.scala    From reactive-activemq   with Apache License 2.0 5 votes vote down vote up
package akka.stream.integration

import java.io.InputStream
import java.net.URL

import akka.stream.integration.BrokerResources.{ QueueStat, TopicStat }
import org.scalatest.BeforeAndAfterEach

import scala.xml.NodeSeq

trait BrokerResources extends BeforeAndAfterEach { _: TestSpec =>

  def enableClearQueus: Boolean

  private def callBroker(path: String): InputStream = {
    val amqHost = system.settings.config.getString("amq.host")
    val url = new URL(s"http://$amqHost:8161" + path)
    val urlConnection = url.openConnection()
    val basicAuth = "Basic " + new String(java.util.Base64.getUrlEncoder.encode("admin:admin".getBytes()))
    urlConnection.addRequestProperty("Authorization", basicAuth)
    urlConnection.getInputStream
  }

  // communicate with the broker //
  private def getQueueXmlFromBroker: NodeSeq = {
    import scala.xml.XML
    XML.load(callBroker("/admin/xml/queues.jsp"))
  }

  def getTopicXmlFromBroker: NodeSeq = {
    import scala.xml.XML
    XML.load(callBroker("/admin/xml/topics.jsp"))
  }

  def getQueueStats: List[QueueStat] = (for {
    e ← getQueueXmlFromBroker \\ "queue"
    stat ← e \ "stats"
  } yield QueueStat(
    (e \ "@name").text,
    (stat \ "@size").text.toInt,
    (stat \ "@consumerCount").text.toInt,
    (stat \ "@enqueueCount").text.toInt,
    (stat \ "@dequeueCount").text.toInt
  )).toList

  def getTopicStats: List[TopicStat] = (for {
    e ← getTopicXmlFromBroker \\ "topic"
    stat ← e \ "stats"
  } yield TopicStat(
    (e \ "@name").text,
    (stat \ "@size").text.toInt,
    (stat \ "@consumerCount").text.toInt,
    (stat \ "@enqueueCount").text.toInt,
    (stat \ "@dequeueCount").text.toInt
  )).toList

  def purgeQueues(): Unit = {
    def purgeQueue(destinationName: String): InputStream = {
      val path = s"/api/jolokia/exec/org.apache.activemq:brokerName=localhost,destinationName=$destinationName,destinationType=Queue,type=Broker/purge"
      callBroker(path)
    }
    getQueueList.foreach(purgeQueue)
  }

  def getQueueList: List[String] = (for {
    e ← getQueueXmlFromBroker \\ "queue"
  } yield (e \ "@name").text).toList

  def getQueueStatFor(topic: String): Option[QueueStat] =
    getQueueStats.find(_.name contains topic)

  def getQueueMessageCount(topic: String): Option[Int] = for {
    stat ← getQueueStatFor(topic)
  } yield stat.enqueueCount - stat.dequeueCount

  override protected def beforeEach(): Unit = {
    if (enableClearQueus)
      purgeQueues()
    super.beforeEach()
  }
}

object BrokerResources {
  case class QueueStat(name: String, size: Int, consumerCount: Int, enqueueCount: Int, dequeueCount: Int)
  case class TopicStat(name: String, size: Int, consumerCount: Int, enqueueCount: Int, dequeueCount: Int)
} 
Example 76
Source File: ClasspathResources.scala    From reactive-activemq   with Apache License 2.0 5 votes vote down vote up
package akka.stream.integration

import java.io.InputStream

import akka.stream.IOResult
import akka.stream.scaladsl.{ Source, StreamConverters }
import akka.util.ByteString

import scala.concurrent.Future
import scala.io.{ Source => ScalaIOSource }

trait ClasspathResources {
  def withInputStream[T](fileName: String)(f: InputStream => T): T = {
    val is: InputStream = fromClasspathAsStream(fileName)
    try f(is) finally is.close()
  }

  def withInputStreamAsText[T](fileName: String)(f: String => T): T =
    f(fromClasspathAsString(fileName))

  def withByteStringSource[T](fileName: String)(f: Source[ByteString, Future[IOResult]] => T): T =
    withInputStream(fileName) { inputStream =>
      f(StreamConverters.fromInputStream(() => inputStream))
    }

  def streamToString(is: InputStream): String =
    ScalaIOSource.fromInputStream(is).mkString

  def fromClasspathAsString(fileName: String): String =
    streamToString(fromClasspathAsStream(fileName))

  def fromClasspathAsStream(fileName: String): InputStream =
    getClass.getClassLoader.getResourceAsStream(fileName)
} 
Example 77
Source File: Https.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.common

import java.io.{FileInputStream, InputStream}
import java.security.{KeyStore, SecureRandom}
import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory}

import akka.http.scaladsl.ConnectionContext
import akka.stream.TLSClientAuth
import com.typesafe.sslconfig.akka.AkkaSSLConfig

object Https {
  case class HttpsConfig(keystorePassword: String, keystoreFlavor: String, keystorePath: String, clientAuth: String)

  def getCertStore(password: Array[Char], flavor: String, path: String): KeyStore = {
    val cs: KeyStore = KeyStore.getInstance(flavor)
    val certStore: InputStream = new FileInputStream(path)
    cs.load(certStore, password)
    cs
  }

  def connectionContext(httpsConfig: HttpsConfig, sslConfig: Option[AkkaSSLConfig] = None) = {

    val keyFactoryType = "SunX509"
    val clientAuth = {
      if (httpsConfig.clientAuth.toBoolean)
        Some(TLSClientAuth.need)
      else
        Some(TLSClientAuth.none)
    }

    val keystorePassword = httpsConfig.keystorePassword.toCharArray

    val keyStore: KeyStore = KeyStore.getInstance(httpsConfig.keystoreFlavor)
    val keyStoreStream: InputStream = new FileInputStream(httpsConfig.keystorePath)
    keyStore.load(keyStoreStream, keystorePassword)

    val keyManagerFactory: KeyManagerFactory = KeyManagerFactory.getInstance(keyFactoryType)
    keyManagerFactory.init(keyStore, keystorePassword)

    // Currently, we are using the keystore as truststore as well, because the clients use the same keys as the
    // server for client authentication (if enabled).
    // So this code is guided by https://doc.akka.io/docs/akka-http/10.0.9/scala/http/server-side-https-support.html
    // This needs to be reworked, when we fix the keys and certificates.
    val trustManagerFactory: TrustManagerFactory = TrustManagerFactory.getInstance(keyFactoryType)
    trustManagerFactory.init(keyStore)

    val sslContext: SSLContext = SSLContext.getInstance("TLS")
    sslContext.init(keyManagerFactory.getKeyManagers, trustManagerFactory.getTrustManagers, new SecureRandom)

    ConnectionContext.https(sslContext, sslConfig, clientAuth = clientAuth)
  }
} 
Example 78
Source File: Unzip.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.standalone

import java.io.{File, FileOutputStream, InputStream}
import java.util.zip.ZipInputStream

object Unzip {

  def apply(is: InputStream, dir: File): Unit = {
    //Based on https://stackoverflow.com/a/40547896/1035417
    val zis = new ZipInputStream((is))
    val dest = dir.toPath
    Stream.continually(zis.getNextEntry).takeWhile(_ != null).foreach { zipEntry =>
      if (!zipEntry.isDirectory) {
        val outPath = dest.resolve(zipEntry.getName)
        val outPathParent = outPath.getParent
        if (!outPathParent.toFile.exists()) {
          outPathParent.toFile.mkdirs()
        }

        val outFile = outPath.toFile
        val out = new FileOutputStream(outFile)
        val buffer = new Array[Byte](4096)
        Stream.continually(zis.read(buffer)).takeWhile(_ != -1).foreach(out.write(buffer, 0, _))
        out.close()
      }
    }
    zis.close()
  }

} 
Example 79
Source File: SwaggerAPI.scala    From swagger-check   with MIT License 5 votes vote down vote up
package de.leanovate.swaggercheck.schema

import java.io.InputStream

import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty}
import com.fasterxml.jackson.core.JsonFactory
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, MappingJsonFactory, ObjectMapper}
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import de.leanovate.swaggercheck.schema.jackson.JsonSchemaModule
import de.leanovate.swaggercheck.schema.model.{Definition, Parameter}

import scala.collection.JavaConverters._
import scala.io.Source

@JsonDeserialize(builder = classOf[SwaggerAPIBuilder])
case class SwaggerAPI(
                       basePath: Option[String],
                       paths: Map[String, Map[String, Operation]],
                       definitions: Map[String, Definition]
                     )

object SwaggerAPI {
  val jsonMapper = objectMapper(new MappingJsonFactory())
  val yamlMapper = objectMapper(new YAMLFactory())

  def parse(jsonOrYaml: String): SwaggerAPI = {
    val mapper = if (jsonOrYaml.trim().startsWith("{")) jsonMapper else yamlMapper
    mapper.readValue(jsonOrYaml, classOf[SwaggerAPI])
  }

  def parse(swaggerInput: InputStream): SwaggerAPI = {
    parse(Source.fromInputStream(swaggerInput).mkString)
  }

  def objectMapper(jsonFactory: JsonFactory): ObjectMapper = {
    val mapper = new ObjectMapper(jsonFactory)
    mapper.registerModule(DefaultScalaModule)
    mapper.registerModule(JsonSchemaModule)
    mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
    mapper
  }
}

class SwaggerAPIBuilder @JsonCreator()(
                                        @JsonProperty("basePath") basePath: Option[String],
                                        @JsonProperty("consumes") consumes: Option[Seq[String]],
                                        @JsonProperty("produces") produces: Option[Seq[String]],
                                        @JsonProperty("paths") paths: Option[Map[String, JsonNode]],
                                        @JsonProperty("definitions") definitions: Option[Map[String, Definition]],
                                        @JsonProperty("parameters") globalParameters: Option[Map[String, Parameter]]
                                      ) {
  def build(): SwaggerAPI = {
    val defaultConsumes = consumes.map(_.toSet).getOrElse(Set.empty)
    val defaultProduces = produces.map(_.toSet).getOrElse(Set.empty)
    SwaggerAPI(basePath,
      paths.getOrElse(Map.empty).map {
        case (path, pathDefinition) =>
          val defaultParameters = Option(pathDefinition.get("parameters")).map {
             node =>
               node.iterator().asScala.map {
                 element => SwaggerAPI.jsonMapper.treeToValue(element, classOf[OperationParameter])
               }.toSeq
          }.getOrElse(Seq.empty)

          basePath.map(_ + path).getOrElse(path) -> pathDefinition.fields().asScala.filter(_.getKey != "parameters").map {
            entry =>
              val operation = SwaggerAPI.jsonMapper.treeToValue(entry.getValue, classOf[Operation])
              entry.getKey.toUpperCase -> operation.withDefaults(defaultParameters, defaultConsumes, defaultProduces).resolveGlobalParameters(globalParameters.getOrElse(Map()))
          }.toMap
      },
      definitions.getOrElse(Map.empty))
  }
} 
Example 80
Source File: MessagePack.scala    From airframe   with Apache License 2.0 5 votes vote down vote up
package wvlet.airframe.msgpack.spi
import java.io.{InputStream, OutputStream}

import wvlet.airframe.json.{JSON, JSONScanner, JSONSource}
import wvlet.airframe.msgpack.json.{NestedMessagePackBuilder, StreamMessagePackBuilder}


object MessagePack {
  def newBufferPacker: BufferPacker                                      = Compat.newBufferPacker
  def newPacker(out: OutputStream): Packer                               = Compat.newPacker(out)
  def newUnpacker(in: InputStream): Unpacker                             = Compat.newUnpacker(in)
  def newUnpacker(msgpack: Array[Byte]): Unpacker                        = Compat.newUnpacker(msgpack)
  def newUnpacker(msgpack: Array[Byte], offset: Int, len: Int): Unpacker = Compat.newUnpacker(msgpack, offset, len)

  def fromJSON(json: String): MsgPack           = fromJSON(JSONSource.fromString(json))
  def fromJSON(jsonBytes: Array[Byte]): MsgPack = fromJSON(JSONSource.fromBytes(jsonBytes))
  def fromJSON(json: JSONSource): MsgPack = {
    val context = new StreamMessagePackBuilder()
    JSONScanner.scanAny(json, context)
    context.result
  }
} 
Example 81
Source File: Compat.scala    From airframe   with Apache License 2.0 5 votes vote down vote up
package wvlet.airframe.msgpack.spi
import java.io.{InputStream, OutputStream}

import org.msgpack.{core => mj}
import wvlet.airframe.msgpack.impl.{
  BufferPackerImpl,
  PackerImpl,
  PureScalaBufferPacker,
  PureScalaBufferUnpacker,
  UnpackerImpl
}
import wvlet.airframe.msgpack.io.ByteArrayBuffer


object Compat {
  def isScalaJS = false

  def floatToIntBits(v: Float): Int     = java.lang.Float.floatToRawIntBits(v)
  def doubleToLongBits(v: Double): Long = java.lang.Double.doubleToRawLongBits(v)

  def newBufferPacker: BufferPacker = {
    new PureScalaBufferPacker
    //new BufferPackerImpl(mj.MessagePack.newDefaultBufferPacker())
  }

  def newPacker(out: OutputStream): Packer = {
    // TODO: Use pure-scala packer
    // new PureScalaBufferPacker
    new PackerImpl(mj.MessagePack.newDefaultPacker(out))
  }

  def newUnpacker(in: InputStream): Unpacker = {
    new UnpackerImpl(mj.MessagePack.newDefaultUnpacker(in))
  }

  def newUnpacker(msgpack: Array[Byte]): Unpacker = {
    newUnpacker(msgpack, 0, msgpack.length)
  }

  def newUnpacker(msgpack: Array[Byte], offset: Int, len: Int): Unpacker = {
    //new UnpackerImpl(mj.MessagePack.newDefaultUnpacker(msgpack, offset, len))
    // Use pure-scala unpacker
    new PureScalaBufferUnpacker(ByteArrayBuffer.fromArray(msgpack, offset, len))
  }
} 
Example 82
Source File: IO.scala    From airframe   with Apache License 2.0 5 votes vote down vote up
package wvlet.airframe.control
import java.io.{ByteArrayOutputStream, File, InputStream}
import java.net.URL
import java.nio.charset.StandardCharsets

import wvlet.airframe.control.Control.withResource


object IO {

  def readAsString(f: File): String = {
    readAsString(f.toURI.toURL)
  }

  def readAsString(url: URL): String = {
    withResource(url.openStream()) { in => readAsString(in) }
  }

  def readAsString(in: InputStream): String = {
    new String(readFully(in), StandardCharsets.UTF_8)
  }

  def readFully(in: InputStream): Array[Byte] = {
    val byteArray =
      if (in == null) {
        Array.emptyByteArray
      } else {
        withResource(new ByteArrayOutputStream) { b =>
          val buf = new Array[Byte](8192)
          withResource(in) { src =>
            var readBytes = 0
            while ({
              readBytes = src.read(buf);
              readBytes != -1
            }) {
              b.write(buf, 0, readBytes)
            }
          }
          b.toByteArray
        }
      }
    byteArray
  }

} 
Example 83
Source File: ApplicationObjectInputStream.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.serialization

import java.io.{InputStream, ObjectInputStream, ObjectStreamClass}


  private val classLoader = getClass.getClassLoader

  override def resolveClass(desc: ObjectStreamClass): Class[_] = {
    try {
      this.classLoader.loadClass(desc.getName)
    }
    catch {
      case _: ClassNotFoundException =>
        // FlinkUserCodeClassLoader will fail to load some basic java classes, such as java.lang.Integer, for some
        // reason. Falling back to the default resolveClass implementation should successfully load these classes.
        super.resolveClass(desc)
    }
  }
} 
Example 84
Source File: JsonDataInputFormat.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.dataformats

import java.io.InputStream

import com.amazon.milan.HashUtil
import com.amazon.milan.serialization.{DataFormatConfiguration, JavaTypeFactory, MilanObjectMapper}
import com.amazon.milan.typeutil.TypeDescriptor
import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize}

import scala.collection.JavaConverters._
import scala.language.experimental.macros



@JsonSerialize
@JsonDeserialize
class JsonDataInputFormat[T: TypeDescriptor](val config: DataFormatConfiguration)
  extends DataInputFormat[T] {

  @transient private lazy val objectMapper = new MilanObjectMapper(this.config)
  @transient private lazy val javaType = new JavaTypeFactory(this.objectMapper.getTypeFactory).makeJavaType(this.recordTypeDescriptor)
  @transient private lazy val hashCodeValue = HashUtil.combineHashCodes(this.recordTypeDescriptor.hashCode(), this.config.hashCode())

  private var recordTypeDescriptor = implicitly[TypeDescriptor[T]]

  def this() {
    this(DataFormatConfiguration.default)
  }

  override def getGenericArguments: List[TypeDescriptor[_]] =
    List(implicitly[TypeDescriptor[T]])

  override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = {
    this.recordTypeDescriptor = genericArgs.head.asInstanceOf[TypeDescriptor[T]]
  }

  override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = {
    Some(this.objectMapper.readValue[T](bytes, offset, length, this.javaType))
  }

  override def readValues(stream: InputStream): TraversableOnce[T] = {
    this.objectMapper.readerFor(this.javaType).readValues[T](stream).asScala
  }

  override def hashCode(): Int = this.hashCodeValue

  override def equals(obj: Any): Boolean = {
    obj match {
      case o: JsonDataInputFormat[T] =>
        this.recordTypeDescriptor.equals(o.recordTypeDescriptor) &&
          this.config.equals(o.config)

      case _ =>
        false
    }
  }
} 
Example 85
Source File: TypeInformationDataInputFormat.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.dataformats

import java.io.{ByteArrayInputStream, EOFException, InputStream}

import com.amazon.milan.dataformats.DataInputFormat
import com.amazon.milan.typeutil.TypeDescriptor
import org.apache.flink.api.common.ExecutionConfig
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.core.memory.DataInputViewStreamWrapper



class TypeInformationDataInputFormat[T](typeInfo: TypeInformation[T]) extends DataInputFormat[T] {
  @transient private lazy val serializer = this.createSerializer()

  override def getGenericArguments: List[TypeDescriptor[_]] = {
    // This class is not intended to be serialized by GenericTypedJsonSerializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = {
    // This class is not intended to be deserialized by GenericTypedJsonDeserializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = {
    val input = new DataInputViewStreamWrapper(new ByteArrayInputStream(bytes, offset, length))
    Some(this.serializer.deserialize(input))
  }

  override def readValues(stream: InputStream): TraversableOnce[T] = {
    val input = new DataInputViewStreamWrapper(stream)
    Stream.continually(0)
      .map(_ =>
        try {
          Some(this.serializer.deserialize(input))
        }
        catch {
          case _: EOFException => None
        })
      .takeWhile(_.isDefined)
      .map(_.get)
  }

  private def createSerializer(): TypeSerializer[T] = {
    val config = new ExecutionConfig()
    this.typeInfo.createSerializer(config)
  }
} 
Example 86
Source File: History.scala    From sbt-flaky   with Apache License 2.0 5 votes vote down vote up
package flaky.history

import java.io.{File, FileFilter, InputStream}
import java.text.SimpleDateFormat
import java.util.Date

import flaky.{Flaky, FlakyTestReport, Io}
import org.apache.commons.vfs2.VFS

import scala.xml.XML

class History(project: String, historyDir: File, flakyReportDir: File, projectDir: File) {

  private val zipFileFilter = new FileFilter {
    override def accept(pathname: File): Boolean = pathname.getName.endsWith(".zip")
  }

  private def runFiles(historyDir: File): List[File] = historyDir.listFiles(zipFileFilter).toList

  def addCurrentToHistory(): Unit = {
    val timestamp = System.currentTimeMillis()

    val date = new SimpleDateFormat(History.dateFormat).format(new Date(timestamp))
    val gitCommit = Git(projectDir).currentId().toOption
    val historyReportDescription = HistoryReportDescription(timestamp, gitCommit)
    HistoryReportDescription.save(historyReportDescription, new File(flakyReportDir, History.descriptorFile))
    Zip.compressFolder(new File(historyDir, s"$date.zip"), flakyReportDir)
  }

  def removeToOldFromHistory(maxToKeep: Int): Unit = {
    runFiles(historyDir)
      .take(Math.max(runFiles(historyDir).size - maxToKeep, 0))
      .foreach(_.delete())
  }

  def createHistoryReport(): HistoryReport = {

    val historicalRuns: List[HistoricalRun] = runFiles(historyDir)
      .map(History.loadHistory)
    val date = new SimpleDateFormat("HH:mm dd-MM-YYYY").format(new Date())
    HistoryReport(project, date, historicalRuns)
  }


  def processHistory(): HistoryReport = {
    historyDir.mkdirs()
    addCurrentToHistory()
    removeToOldFromHistory(20)
    createHistoryReport()
  }
}


case class HistoryReportDescription(timestamp: Long, gitCommitHash: Option[String])

object HistoryReportDescription {

  def load(in: InputStream): HistoryReportDescription = {
    val descriptorXml = XML.load(in)
    val timestamp = (descriptorXml \ "timestamp").text.trim.toLong
    val gitHash = (descriptorXml \ "gitCommitHash").text.trim
    HistoryReportDescription(timestamp, Some(gitHash))
  }

  def save(historyReportDescription: HistoryReportDescription, file: File): Unit = {
    val xml =
      <HistoryReportDescription>
        <timestamp>
          {historyReportDescription.timestamp}
        </timestamp>
        <gitCommitHash>
          {historyReportDescription.gitCommitHash.getOrElse("")}
        </gitCommitHash>
      </HistoryReportDescription>
    val prettyXml = new scala.xml.PrettyPrinter(80, 2).format(xml)
    Io.writeToFile(file, prettyXml)
  }
}

object History {
  val descriptorFile = "descriptor.xml"
  val dateFormat = "yyyyMMdd-HHmmss"

  def loadHistory: (File) => HistoricalRun = {
    file => {
      val manager = VFS.getManager
      val uri = file.toURI.toString.replace("file:/", "zip:/")
      val fo = manager.resolveFile(uri)
      val report: FlakyTestReport = Flaky.createReportFromHistory(fo)
      val descriptorFile = Option(fo.getChild(History.descriptorFile))
      val dateFromFileName = file.getName.replace(".zip","")
      val hrd = descriptorFile
        .filter(_.exists())
        .map(f => HistoryReportDescription.load(f.getContent.getInputStream))
        .getOrElse(HistoryReportDescription(new SimpleDateFormat(dateFormat).parse(dateFromFileName).getTime, None))
      HistoricalRun(hrd, report)
    }
  }
} 
Example 87
Source File: PigTransformation.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.dsl.transformations

import java.io.{FileInputStream, InputStream}

import org.apache.commons.lang.StringUtils
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege
import org.apache.hadoop.hive.ql.udf.UDFLength
import org.apache.hive.hcatalog.data.schema.HCatSchema
import org.apache.hive.hcatalog.pig.HCatLoader
import org.apache.pig.builtin.ParquetStorer
import org.schedoscope.scheduler.service.ViewTransformationStatus



case class PigTransformation(latin: String, dirsToDelete: List[String] = List()) extends Transformation {

  def name = "pig"

  override def stringsToChecksum = List(latin)

  description = "[..]" + StringUtils.abbreviate(latin.replaceAll("\n", "").replaceAll("\t", "").replaceAll("\\s+", " "), 60)

  def defaultLibraries = {
    // FIXME: declare jars instead of any random class included in this jar
    val classes = List(
      // needed for usage of HCatalog table management
      classOf[HCatLoader], classOf[HCatSchema], classOf[HiveObjectPrivilege], classOf[UDFLength],
      // needed for usage of storage format Parquet with pig
      classOf[ParquetStorer])
    classes.map(cl => try {
      cl.getProtectionDomain().getCodeSource().getLocation().getFile
    } catch {
      case t: Throwable => null
    })
      .filter(cl => cl != null && !"".equals(cl.trim))
  }

  override def viewTransformationStatus = ViewTransformationStatus(
    name,
    Some(Map("latin" -> latin)))
}

object PigTransformation {

  def scriptFrom(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream, "UTF-8").mkString

  def scriptFromResource(resourcePath: String): String = scriptFrom(getClass().getClassLoader().getResourceAsStream(resourcePath))

  def scriptFrom(filePath: String): String = scriptFrom(new FileInputStream(filePath))
} 
Example 88
Source File: OozieTransformation.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.dsl.transformations

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import org.apache.commons.lang.StringUtils
import org.schedoscope.Settings
import org.schedoscope.scheduler.service.ViewTransformationStatus

import scala.collection.JavaConversions._


case class OozieTransformation(bundle: String, workflow: String, var workflowAppPath: String) extends Transformation {
  def name = "oozie"

  override def fileResourcesToChecksum = List(workflowAppPath)

  description = StringUtils.abbreviate(s"${bundle}/${workflow}", 100)

  override def viewTransformationStatus = ViewTransformationStatus(
    name,
    Some(Map(
      "bundle" -> bundle,
      "workflow" -> workflow)))
}

object OozieTransformation {
  def oozieWFPath(bundle: String, workflow: String) = s"${Settings().getDriverSettings("oozie").location}/workflows/${bundle}/${workflow}/"

  def configurationFrom(inputStream: InputStream): Map[String, String] = {
    val props = new Properties()

    try {
      props.load(inputStream)
    } catch {
      case t: Throwable =>
    }

    Map() ++ props
  }

  def configurationFrom(filePath: String): Map[String, String] = try
    configurationFrom(new FileInputStream(filePath))
  catch {
    case t: Throwable => Map()
  }

  def configurationFromResource(resourcePath: String): Map[String, String] =
    try
      configurationFrom(getClass().getClassLoader().getResourceAsStream(resourcePath))
    catch {
      case t: Throwable => Map()
    }
} 
Example 89
Source File: WholeFileReader.scala    From magellan   with Apache License 2.0 5 votes vote down vote up
package magellan.mapreduce

import java.io.InputStream

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory, Decompressor}
import org.apache.hadoop.io.{NullWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.FileSplit
import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}

class WholeFileReader extends RecordReader[NullWritable, Text] {

  private val key = NullWritable.get()
  private val value = new Text()
  private var split: FileSplit = _
  private var conf: Configuration = _
  private var path: Path = _
  private var done: Boolean = false

  override def getProgress: Float = ???

  override def nextKeyValue(): Boolean = {
    if (done){
      false
    } else {
      val fs = path.getFileSystem(conf)
      var is: FSDataInputStream = null
      var in: InputStream = null
      var decompressor: Decompressor = null
      try {
        is = fs.open(split.getPath)
        val codec = new CompressionCodecFactory(conf).getCodec(path)
        if (codec != null) {
          decompressor = CodecPool.getDecompressor(codec)
          in = codec.createInputStream(is, decompressor)
        } else {
          in = is
        }
        val result = IOUtils.toByteArray(in)
        value.clear()
        value.set(result)
        done = true
        true
      } finally {
        if (in != null) {
          IOUtils.closeQuietly(in)
        }
        if (decompressor != null) {
          CodecPool.returnDecompressor(decompressor)
        }
      }
    }
  }

  override def getCurrentValue: Text = value

  override def initialize(inputSplit: InputSplit,
    taskAttemptContext: TaskAttemptContext): Unit = {
    this.split = inputSplit.asInstanceOf[FileSplit]
    this.conf = MapReduceUtils.getConfigurationFromContext(taskAttemptContext)
    this.path = this.split.getPath
  }

  override def getCurrentKey: NullWritable = key

  override def close() {}
} 
Example 90
Source File: HDFSFileService.scala    From retail_analytics   with Apache License 2.0 5 votes vote down vote up
package models

import scalaz._
import Scalaz._
import scalaz.EitherT._
import scalaz.Validation
import scalaz.NonEmptyList._
import java.io.BufferedInputStream
import java.io.File
import java.io.FileInputStream
import java.io.InputStream
import org.apache.hadoop.conf._
import org.apache.hadoop.fs._

object HDFSFileService {
  private val conf = new Configuration()
  private val hdfsCoreSitePath = new Path("core-site.xml")
  private val hdfsHDFSSitePath = new Path("hdfs-site.xml")

  conf.addResource(hdfsCoreSitePath)
  conf.addResource(hdfsHDFSSitePath)

  private val fileSystem = FileSystem.get(conf)

  def saveFile(filepath: String): ValidationNel[Throwable, String] = {
    (Validation.fromTryCatch[String] {
      
      val file = new File(filepath)
      val out = fileSystem.create(new Path(file.getName))
      
      
      val in = new BufferedInputStream(new FileInputStream(file))
      var b = new Array[Byte](1024)
      var numBytes = in.read(b)
      while (numBytes > 0) {
        out.write(b, 0, numBytes)
        numBytes = in.read(b)
      }
      
      in.close()
      out.close()
      "File Uploaded"
    } leftMap { t: Throwable => nels(t) })

  }
 

  def removeFile(filename: String): Boolean = {
    val path = new Path(filename)
    fileSystem.delete(path, true)
  }

  def getFile(filename: String): InputStream = {
    val path = new Path(filename)
    fileSystem.open(path)
  }

  def createFolder(folderPath: String): Unit = {
    val path = new Path(folderPath)
    if (!fileSystem.exists(path)) {
      fileSystem.mkdirs(path)
    }
  }
} 
Example 91
Source File: BufferedSource.scala    From perf_tester   with Apache License 2.0 5 votes vote down vote up
package scala.io

import java.io.{ InputStream, BufferedReader, InputStreamReader, PushbackReader }
import Source.DefaultBufSize
import scala.collection.{ Iterator, AbstractIterator }


  override def mkString = {
    // Speed up slurping of whole data set in the simplest cases.
    val allReader = decachedReader
    val sb = new StringBuilder
    val buf = new Array[Char](bufferSize)
    var n = 0
    while (n != -1) {
      n = allReader.read(buf)
      if (n>0) sb.appendAll(buf, 0, n)
    }
    sb.result
  }
} 
Example 92
Source File: BaseClassifier.scala    From project-matt   with MIT License 5 votes vote down vote up
package org.datafy.aws.app.matt.classifiers

import java.io.InputStream

import org.datafy.aws.app.matt.extras.{S3KeySummary, S3Manager, Utilities}
import org.datafy.aws.app.matt.models.{FullScanStats, ObjectScanStats, RiskStats, ScanObjectsModel}
import com.typesafe.scalalogging._
import org.slf4j.LoggerFactory



object BaseClassifier {

  val logger = LoggerFactory.getLogger("BaseClassifier")

  def setS3ScanInputPath(bucketName: String, s3Prefix: String) = {
    // get last object and cache in redis
    val redisReferenceKey = generateReferenceKey(bucketName, s3Prefix)
    val lastScannedKey = ScanObjectsModel.getLastScannedFromRedis(redisReferenceKey)
    logger.info(s"Last Scanned S3 Key: ${lastScannedKey.getOrElse("This is the first scan")}")

    val bucketObjects: List[S3KeySummary] = S3Manager.getBucketObjects(bucketName, s3Prefix, lastScannedKey)

    logger.info(s"Total Number of S3 Objects for scanning: ${bucketObjects.length}")
    if (!bucketObjects.isEmpty) {
      try {
        val totalSizeScanned: (String, Int) = S3Manager.computeTotalObjectSize(bucketObjects).head
        logger.info(s"Total size of scanned objects: ${totalSizeScanned._2}")
        // commence object scan here
        val payloadSummary = bucketObjects.map {
          s3Object =>
            val s3ObjectInputStream = S3Manager.getObjectContentAsStream(s3Object.bucketName, s3Object.key)
            val textContent = this.scanInputStream(s3ObjectInputStream, s3Object.key)
            val regexClassifier = RegexClassifier.scanTextContent(textContent)
            val objectStats: List[RiskStats] = regexClassifier.computeRiskStats()
            val riskLevel = regexClassifier.getDocumentRiskLevels()

            val objectStatsSummary = ObjectScanStats(s3Key = s3Object.key,
              objectSummaryStats = objectStats, classifier = "Regex")
            (textContent, objectStatsSummary)
        }
        // all objects
        val regexClassifier = RegexClassifier.scanTextContent(payloadSummary.map(_._1))
        val fullScanStats = regexClassifier.computeRiskStats()

        val objectScanStats = payloadSummary.map(_._2)
        // return to save results actor
        val scanStats = FullScanStats(
          s3Bucket = bucketName,
          lastScannedKey = "",
          summaryStats = fullScanStats,
          objectScanStats = objectScanStats,
          totalObjectsSize = Some(totalSizeScanned._2)
        )

        val savedKey = ScanObjectsModel.saveScannedResults(scanStats)
        val newLastScannedKey = ScanObjectsModel.saveLastScannedToRedis(redisReferenceKey, bucketObjects)
        scanStats
      } catch {
        case e: Throwable => e.printStackTrace()
      }
      true
    }else {
      logger.info("No files to scan at this time.")
      false
    }
  }

  private def scanInputStream(inputStream: InputStream, s3Key: String): String = {
    // check if input stream is compressed
    if (s3Key.contains("parquet"))
      return Utilities.getParseParquetStream(inputStream)

    val check = Utilities.checkIfStreamIsCompressed(inputStream)
    if(check)
      return Utilities.getParseCompressedStream(inputStream)

    Utilities.getParsePlainStream(inputStream)
  }

  private def generateReferenceKey(s3Bucket: String, s3Prefix: String) = {
    var referenceKey = s"s3Key_${s3Bucket}"
    if(!s3Prefix.isEmpty)
      referenceKey += s":s3Prefix_${s3Prefix}"
    referenceKey
  }

} 
Example 93
Source File: TikaParquetParser.scala    From project-matt   with MIT License 5 votes vote down vote up
package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import scala.collection.JavaConverters._
import org.xml.sax.{ContentHandler, SAXException}
import org.apache.tika.metadata.Metadata
import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.ParquetFileReader
import org.apache.parquet.hadoop.ParquetReader
import org.apache.parquet.format.converter.ParquetMetadataConverter
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.parquet.tools.json.JsonRecordFormatter
import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord}
import org.apache.tika.exception.TikaException
import org.apache.tika.sax.XHTMLContentHandler

import scala.util.Random


class TikaParquetParser extends AbstractParser {
  // make some stuff here
  final val PARQUET_RAW = MediaType.application("x-parquet")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    val fileNamePrefix = Random.alphanumeric.take(5).mkString
    val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet")
    IOUtils.copy(stream, new FileOutputStream(tempFile))

    val conf = new Configuration()
    val path = new Path(tempFile.getAbsolutePath)
    val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER)
    var defaultReader: ParquetReader[SimpleRecord] = null

    val columns = parquetMetadata.getFileMetaData.getSchema.getFields
    metadata.set(CONTENT_TYPE, PARQUET_RAW.toString)
    metadata.set("Total Number of Columns", columns.size.toString)
    metadata.set("Parquet Column Names", columns.toString)

    val xhtml = new XHTMLContentHandler(handler, metadata)
    xhtml.startDocument()
    xhtml.startElement("p")

    // ::TODO:: ensure parquet reader reads all files not only file row
    try {
      defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build()
      if(defaultReader.read() != null) {
        val values: SimpleRecord = defaultReader.read()
        val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema)

        val textContent: String = jsonFormatter.formatRecord(values)
        xhtml.characters(textContent)
        xhtml.endElement("p")
        xhtml.endDocument()
      }

    } catch {
        case e: Throwable => e.printStackTrace()
          if (defaultReader != null) {
          try {
            defaultReader.close()
          } catch{
            case _: Throwable =>
          }
        }
    } finally {
      if (tempFile != null) tempFile.delete()
    }
  }

} 
Example 94
Source File: TikaHadoopOrcParser.scala    From project-matt   with MIT License 5 votes vote down vote up
package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration

import scala.collection.JavaConverters._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.serde2.objectinspector.StructField
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
import org.apache.orc.OrcFile
import org.apache.orc.OrcFile.ReaderOptions
import org.apache.orc.Reader
import org.apache.orc.RecordReader
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.xml.sax.{ContentHandler, SAXException}

import scala.util.Random


class TikaHadoopOrcParser extends AbstractParser  {
  final val ORC_RAW = MediaType.application("x-orc")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    try {
      val fileNamePrefix = Random.alphanumeric.take(5).mkString
      val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc")
      IOUtils.copy(stream, new FileOutputStream(tempFile))

      val path = new Path(tempFile.getAbsolutePath)
      val conf = new Configuration()
      val orcReader = OrcFile.createReader(path, new ReaderOptions(conf))
      val records: RecordReader = orcReader.rows()

      val storeRecord = null
      val firstBlockKey = null

    } catch {
      case e: Throwable => e.printStackTrace()
    }



//    val fields =

  }
} 
Example 95
Source File: Utilities.scala    From project-matt   with MIT License 5 votes vote down vote up
package org.datafy.aws.app.matt.extras

import org.apache.tika.Tika
import org.apache.tika.metadata.Metadata
import java.io.{BufferedInputStream, IOException, InputStream, StringWriter}
import java.util.zip.GZIPInputStream

import org.xml.sax.SAXException
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.serialization.JsonMetadata
import org.apache.tika.parser.{AutoDetectParser, ParseContext}
import org.apache.tika.parser.pkg.CompressorParser
import org.apache.tika.sax.BodyContentHandler


object Utilities {

  private val MAX_STRING_LENGTH = 2147483647

  private val tika = new Tika()
  tika.setMaxStringLength(MAX_STRING_LENGTH)

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def getParsePlainStream(inputStream: InputStream): String = {

    val autoDetectParser = new AutoDetectParser()
    val bodyContentHandler = new BodyContentHandler(MAX_STRING_LENGTH)
    val fileMetadata = new Metadata()

    if (inputStream.read() == -1) {
      return "Could not scan inputStream less than 0 bytes"
    }
    autoDetectParser.parse(inputStream, bodyContentHandler, fileMetadata)
    bodyContentHandler.toString
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def getParseCompressedStream(inputStream: InputStream) = {
    
    var inputStream = myStream
    if(!inputStream.markSupported()) {
      inputStream = new BufferedInputStream(inputStream)
    }
    inputStream.mark(2)
    var magicBytes = 0
    try {
      magicBytes = inputStream.read() & 0xff | ((inputStream.read() << 8) & 0xff00)
      inputStream.reset()
    } catch  {
      case ioe: IOException => ioe.printStackTrace()
    }
    magicBytes == GZIPInputStream.GZIP_MAGIC
  }
}

case class And[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) {
  def apply( a: A ) = p1(a) && p2(a)
}


case class Or[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) {
  def apply( a: A ) = p1(a) || p2(a)
} 
Example 96
Source File: UtilitiesSpec.scala    From project-matt   with MIT License 5 votes vote down vote up
package org.datafy.aws.app.matt.extras

import java.io.InputStream

import org.scalatest.FlatSpec


class UtilitiesSpec extends FlatSpec {

  val utilities: Utilities.type = Utilities

  val someCompressedJson: InputStream = getClass.getResourceAsStream("/UtilitiesSpec/sample-data.json.gz")

  "getParsePlainStream" should "return empty string when parsing parquet" in {
    val someParquetFile = getClass.getResourceAsStream("/UtilitiesSpec/part-r-00004.gz.parquet")
    val fileContents = utilities.getParsePlainStream(someParquetFile)
    someParquetFile.close()
    assert(fileContents.length == 0)
  }

  "checkIfStreamIsCompressed" should "check if a json input stream is compressed" in {
    val check = utilities.checkIfStreamIsCompressed(someCompressedJson)
    assert(check)
  }

  "getParseCompressedStream" should "read content of compressed file" in {
    val textContent = utilities.getParseCompressedStream(someCompressedJson)
    assert(textContent.length != 0)
  }

  "getParseParquetStream" should "read content of parquet file" in {
    val someParquetFile = getClass.getResourceAsStream("/UtilitiesSpec/userdata1.parquet")
    val textContent = utilities.getParseParquetStream(someParquetFile)
    println(textContent)
    assert(textContent.length != 0)
  }

} 
Example 97
Source File: SplashUtils.scala    From splash   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.shuffle

import java.io.{InputStream, OutputStream}
import java.util.Comparator

import org.apache.spark.internal.Logging

import scala.util.control.NonFatal

object SplashUtils extends Logging {
  def withResources[T <: AutoCloseable, V](r: => T)(f: T => V): V = {
    val resource: T = r
    require(resource != null, "resource is null")
    var exception: Throwable = null
    try {
      f(resource)
    } catch {
      case NonFatal(e) =>
        exception = e
        throw e
      case e: Throwable =>
        logError("fatal error received.", e)
        throw e
    } finally {
      closeAndAddSuppressed(exception, resource)
    }
  }

  private def closeAndAddSuppressed(e: Throwable,
      resource: AutoCloseable): Unit = {
    if (e != null) {
      try {
        resource.close()
      } catch {
        case NonFatal(suppressed) =>
          e.addSuppressed(suppressed)
      }
    } else {
      resource.close()
    }
  }

  
class SplashHashComparator[K] extends Comparator[K] {
  def compare(key1: K, key2: K): Int = {
    val hash1 = SplashUtils.hash(key1)
    val hash2 = SplashUtils.hash(key2)
    if (hash1 < hash2) -1 else if (hash1 == hash2) 0 else 1
  }
}


class SplashSpillableIterator[T](var upstream: Iterator[T],
    val spillInMemoryIterator: Iterator[T] => SpilledFile,
    val getNextUpstream: SpilledFile => Iterator[T])
    extends Iterator[T] with Logging {
  private val spillLock = new Object
  private var spilledFileOpt: Option[SpilledFile] = None
  private var cur: T = readNext()

  def spill(): Option[SpilledFile] = spillLock.synchronized {
    spilledFileOpt match {
      case Some(_) =>
        // has spilled, return None
        None
      case None =>
        // never spilled, now spilling
        val spilledFile = spillInMemoryIterator(upstream)
        spilledFileOpt = Some(spilledFile)
        spilledFileOpt
    }
  }

  def readNext(): T = spillLock.synchronized {
    spilledFileOpt match {
      case Some(spilledFile) =>
        upstream = getNextUpstream(spilledFile)
        spilledFileOpt = None
      case None =>
      // do nothing
    }
    if (upstream.hasNext) {
      upstream.next()
    } else {
      null.asInstanceOf[T]
    }
  }

  override def hasNext: Boolean = cur != null

  override def next(): T = {
    val ret = cur
    cur = readNext()
    ret
  }
} 
Example 98
Source File: WorkbookReader.scala    From spark-excel   with Apache License 2.0 5 votes vote down vote up
package com.crealytics.spark.excel

import java.io.InputStream

import com.crealytics.spark.excel.Utils.MapIncluding
import com.github.pjfanning.xlsx.StreamingReader
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.poi.ss.usermodel.{Workbook, WorkbookFactory}

trait WorkbookReader {
  protected def openWorkbook(): Workbook
  def withWorkbook[T](f: Workbook => T): T = {
    val workbook = openWorkbook()
    val res = f(workbook)
    workbook.close()
    res
  }
  def sheetNames: Seq[String] = {
    withWorkbook(
      workbook =>
        for (sheetIx <- (0 until workbook.getNumberOfSheets())) yield {
          workbook.getSheetAt(sheetIx).getSheetName()
        }
    )
  }
}

object WorkbookReader {
  val WithLocationMaxRowsInMemoryAndPassword =
    MapIncluding(Seq("path"), optionally = Seq("maxRowsInMemory", "workbookPassword"))

  def apply(parameters: Map[String, String], hadoopConfiguration: Configuration): WorkbookReader = {
    def readFromHadoop(location: String) = {
      val path = new Path(location)
      FileSystem.get(path.toUri, hadoopConfiguration).open(path)
    }
    parameters match {
      case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(Some(maxRowsInMemory), passwordOption)) =>
        new StreamingWorkbookReader(readFromHadoop(location), passwordOption, maxRowsInMemory.toInt)
      case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(None, passwordOption)) =>
        new DefaultWorkbookReader(readFromHadoop(location), passwordOption)
    }
  }
}
class DefaultWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String])
    extends WorkbookReader {
  protected def openWorkbook(): Workbook =
    workbookPassword
      .fold(WorkbookFactory.create(inputStreamProvider))(
        password => WorkbookFactory.create(inputStreamProvider, password)
      )
}

class StreamingWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String], maxRowsInMem: Int)
    extends WorkbookReader {
  override protected def openWorkbook(): Workbook = {
    val builder = StreamingReader
      .builder()
      .rowCacheSize(maxRowsInMem)
      .bufferSize(4096)
    workbookPassword
      .fold(builder)(password => builder.password(password))
      .open(inputStreamProvider)
  }
} 
Example 99
Source File: AggregateFileRepository.scala    From lightbend-markdown   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.markdown.server

import java.io.{File, InputStream}

import com.lightbend.markdown.DocPath
import play.doc.FileHandle
import play.doc.FileRepository


class AggregateFileRepository(repos: Seq[FileRepository]) extends FileRepository {

  private def fromFirstRepo[A](load: FileRepository => Option[A]) = repos.collectFirst(Function.unlift(load))

  def loadFile[A](path: String)(loader: (InputStream) => A) = fromFirstRepo(_.loadFile(path)(loader))

  def handleFile[A](path: String)(handler: (FileHandle) => A) = fromFirstRepo(_.handleFile(path)(handler))

  def findFileWithName(name: String) = fromFirstRepo(_.findFileWithName(name))
}

class PrefixedRepository(prefix: String, repo: FileRepository) extends FileRepository {

  private def withPrefixStripped[T](path: String)(block: String => Option[T]): Option[T] = {
    if (path.startsWith(prefix)) {
      block(path.stripPrefix(prefix))
    } else None
  }

  override def loadFile[A](path: String)(loader: (InputStream) => A): Option[A] =
    withPrefixStripped(path)(repo.loadFile[A](_)(loader))

  override def handleFile[A](path: String)(handler: (FileHandle) => A): Option[A] =
    withPrefixStripped(path)(repo.handleFile[A](_)(handler))

  override def findFileWithName(name: String): Option[String] =
    repo.findFileWithName(name).map(prefix + _)
}

object SourceFinder {
  def findPathFor(rootDir: File, docPaths: Seq[DocPath], path: String): Option[String] = {
    docPaths.collect {
      case DocPath(file, prefix) if prefix == "." => new File(file, path)
      case DocPath(file, prefix) if path.startsWith(prefix) => new File(file, path.stripPrefix(prefix))
    }.collectFirst {
      case file if file.exists() => file.getCanonicalPath.stripPrefix(rootDir.getCanonicalPath).stripPrefix(File.separator)
    }
  }
} 
Example 100
Source File: InputOutput.scala    From lsp4s   with Apache License 2.0 5 votes vote down vote up
package scala.meta.jsonrpc

import java.io.InputStream
import java.io.OutputStream
import monix.execution.Cancelable


final class InputOutput(val in: InputStream, val out: OutputStream)
    extends Cancelable {
  override def cancel(): Unit = {
    Cancelable.cancelAll(
      List(
        Cancelable(() => in.close()),
        Cancelable(() => out.close())
      )
    )
  }
} 
Example 101
Source File: BaseProtocolMessage.scala    From lsp4s   with Apache License 2.0 5 votes vote down vote up
package scala.meta.jsonrpc

import java.io.InputStream
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.util
import io.circe.Json
import io.circe.syntax._
import monix.reactive.Observable
import scribe.LoggerSupport

final class BaseProtocolMessage(
    val header: Map[String, String],
    val content: Array[Byte]
) {

  override def equals(obj: scala.Any): Boolean =
    this.eq(obj.asInstanceOf[Object]) || {
      obj match {
        case m: BaseProtocolMessage =>
          header.equals(m.header) &&
            util.Arrays.equals(content, m.content)
      }
    }

  override def toString: String = {
    val bytes = MessageWriter.write(this)
    StandardCharsets.UTF_8.decode(bytes).toString
  }
}

object BaseProtocolMessage {
  val ContentLen = "Content-Length"

  def apply(msg: Message): BaseProtocolMessage =
    fromJson(msg.asJson)
  def fromJson(json: Json): BaseProtocolMessage =
    fromBytes(json.noSpaces.getBytes(StandardCharsets.UTF_8))
  def fromBytes(bytes: Array[Byte]): BaseProtocolMessage =
    new BaseProtocolMessage(
      Map("Content-Length" -> bytes.length.toString),
      bytes
    )

  def fromInputStream(
      in: InputStream,
      logger: LoggerSupport
  ): Observable[BaseProtocolMessage] =
    fromBytes(Observable.fromInputStream(in), logger)

  def fromBytes(
      in: Observable[Array[Byte]],
      logger: LoggerSupport
  ): Observable[BaseProtocolMessage] =
    fromByteBuffers(in.map(ByteBuffer.wrap), logger)

  def fromByteBuffers(
      in: Observable[ByteBuffer],
      logger: LoggerSupport
  ): Observable[BaseProtocolMessage] =
    in.executeAsync.liftByOperator(new BaseProtocolMessageParser(logger))
} 
Example 102
Source File: HdfsUtilsTest.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.serving.core.utils

import java.io.{FileNotFoundException, InputStream}

import org.apache.hadoop.fs.{FileSystem, _}
import org.junit.runner.RunWith
import org.mockito.Mockito._
import org.scalatest._
import org.scalatest.junit.JUnitRunner
import org.scalatest.mock.MockitoSugar

import scala.util.{Failure, Try}

@RunWith(classOf[JUnitRunner])
class HdfsUtilsTest extends FlatSpec with ShouldMatchers with MockitoSugar {

  val fileSystem: FileSystem = mock[FileSystem]

  val utils = new HdfsUtils(fileSystem, "stratio")

  "hdfs utils" should "getfiles from a path" in {
    val expected = Array(mock[FileStatus])
    when(fileSystem.listStatus(new Path("myTestPath"))).thenReturn(expected)
    val result = utils.getFiles("myTestPath")
    result should be(expected)
  }

  it should "return single file as inputStream" in {
    val expected: InputStream = mock[FSDataInputStream]
    when(fileSystem.open(new Path("testFile"))).thenReturn(expected.asInstanceOf[FSDataInputStream])
    val result: InputStream = utils.getFile("testFile")
    result should be(expected)
  }

  it should "write" in {
    val result = Try(utils.write("from", "to", true)) match {
      case Failure(ex: Throwable) => ex
    }
    result.isInstanceOf[FileNotFoundException] should be(true)
  }

  it should "write without override" in {
    val result = Try(utils.write("from", "to", false)) match {
      case Failure(ex: Throwable) => ex
    }
    result.isInstanceOf[FileNotFoundException] should be(true)
  }
} 
Example 103
Source File: JsonSource.scala    From play-json-schema-validator   with Apache License 2.0 5 votes vote down vote up
package com.eclipsesource.schema

import java.io.InputStream
import java.net.URL

import play.api.libs.json._

import scala.io.Source
import scala.util.{Failure, Success, Try}


  def schemaFromUrl(url: URL)(implicit reads: Reads[SchemaType]): JsResult[SchemaType] = {
    for {
      schemaJson <- JsonSource.fromUrl(url) match {
        case Success(json) => JsSuccess(json)
        case Failure(throwable) => JsError(throwable.getMessage)
      }
      schema <- Json.fromJson[SchemaType](schemaJson)
    } yield schema
  }
} 
Example 104
Source File: StreamCopier.scala    From scala-ssh   with Apache License 2.0 5 votes vote down vote up
package com.decodified.scalassh

import annotation.tailrec
import java.io.{ ByteArrayOutputStream, OutputStream, InputStream }

final class StreamCopier(bufferSize: Int = 4096) {
  private val buffer = new Array[Byte](bufferSize)

  @tailrec
  def copy(in: InputStream, out: OutputStream) {
    val bytes = in.read(buffer)
    if (bytes > 0) {
      out.write(buffer, 0, bytes)
      copy(in, out)
    } else {
      in.close()
      out.close()
    }
  }

  def emptyToString(inputStream: InputStream, charset: String = "UTF8") = {
    new String(emptyToByteArray(inputStream), charset)
  }

  def emptyToByteArray(inputStream: InputStream) = {
    val output = new ByteArrayOutputStream()
    copy(inputStream, output)
    output.toByteArray
  }
} 
Example 105
Source File: Command.scala    From scala-ssh   with Apache License 2.0 5 votes vote down vote up
package com.decodified.scalassh

import net.schmizz.sshj.connection.channel.direct.Session
import java.io.{ FileInputStream, File, ByteArrayInputStream, InputStream }

case class Command(command: String, input: CommandInput = CommandInput.NoInput, timeout: Option[Int] = None)

object Command {
  implicit def string2Command(cmd: String) = Command(cmd)
}

case class CommandInput(inputStream: Option[InputStream])

object CommandInput {
  lazy val NoInput = CommandInput(None)
  implicit def apply(input: String, charsetName: String = "UTF8"): CommandInput = apply(input.getBytes(charsetName))
  implicit def apply(input: Array[Byte]): CommandInput = apply(Some(new ByteArrayInputStream(input)))
  implicit def apply(input: InputStream): CommandInput = apply(Some(input))
  def fromFile(file: String): CommandInput = fromFile(new File(file))
  def fromFile(file: File): CommandInput = new FileInputStream(file)
  def fromResource(resource: String): CommandInput = getClass.getClassLoader.getResourceAsStream(resource)
}

class CommandResult(val channel: Session.Command) {
  def stdErrStream: InputStream = channel.getErrorStream
  def stdOutStream: InputStream = channel.getInputStream
  lazy val stdErrBytes = new StreamCopier().emptyToByteArray(stdErrStream)
  lazy val stdOutBytes = new StreamCopier().emptyToByteArray(stdOutStream)
  def stdErrAsString(charsetname: String = "utf8") = new String(stdErrBytes, charsetname)
  def stdOutAsString(charsetname: String = "utf8") = new String(stdOutBytes, charsetname)
  lazy val exitSignal: Option[String] = Option(channel.getExitSignal).map(_.toString)
  lazy val exitCode: Option[Int] = Option(channel.getExitStatus)
  lazy val exitErrorMessage: Option[String] = Option(channel.getExitErrorMessage)
} 
Example 106
Source File: DFSJarStore.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.jarstore.dfs

import java.io.{InputStream, OutputStream}
import org.apache.gearpump.util.Constants
import org.apache.gearpump.jarstore.JarStore
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import com.typesafe.config.Config
import org.apache.hadoop.fs.permission.{FsAction, FsPermission}


  override def getFile(fileName: String): InputStream = {
    val filePath = new Path(rootPath, fileName)
    val fs = filePath.getFileSystem(new Configuration())
    fs.open(filePath)
  }

  private def createDirIfNotExists(path: Path): Unit = {
    val fs = path.getFileSystem(new Configuration())
    if (!fs.exists(path)) {
      fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL))
    }
  }
} 
Example 107
Source File: FileSystem.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.yarn.glue

import java.io.{InputStream, OutputStream}
import java.net.ConnectException

import org.apache.gearpump.util.LogUtil
import org.apache.hadoop.fs.Path

import scala.util.{Failure, Success, Try}

class FileSystem(yarnConfig: YarnConfig) {

  private val conf = yarnConfig.conf
  private val fs = org.apache.hadoop.fs.FileSystem.get(conf)

  private def LOG = LogUtil.getLogger(getClass)

  def open(file: String): InputStream = exceptionHandler {
    val path = new Path(file)
    fs.open(path)
  }

  def create(file: String): OutputStream = exceptionHandler {
    val path = new Path(file)
    fs.create(path)
  }

  def exists(file: String): Boolean = exceptionHandler {
    val path = new Path(file)
    fs.exists(path)
  }

  def name: String = {
    fs.getUri.toString
  }

  def getHomeDirectory: String = {
    fs.getHomeDirectory.toString
  }

  private def exceptionHandler[T](call: => T): T = {
    val callTry = Try(call)
    callTry match {
      case Success(v) => v
      case Failure(ex) =>
        if (ex.isInstanceOf[ConnectException]) {
          LOG.error("Please check whether we connect to the right HDFS file system, " +
            "current file system is $name." + "\n. Please copy all configs under " +
            "$HADOOP_HOME/etc/hadoop into conf/yarnconf directory of Gearpump package, " +
            "so that we can use the right File system.", ex)
        }
        throw ex
    }
  }
} 
Example 108
Source File: KernelInputStream.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.kernel.protocol.v5.stream

import java.io.InputStream
import java.nio.charset.Charset

import akka.pattern.ask
import org.apache.toree.kernel.protocol.v5.content.InputRequest
import org.apache.toree.kernel.protocol.v5.kernel.ActorLoader
import org.apache.toree.kernel.protocol.v5.kernel.Utilities.timeout
import org.apache.toree.kernel.protocol.v5.{KMBuilder, MessageType}

import scala.collection.mutable.ListBuffer
import scala.concurrent.{Await, Future}

import KernelInputStream._

object KernelInputStream {
  val DefaultPrompt = ""
  val DefaultPassword = false
}


  override def read(): Int = {
    if (!this.hasByte) this.requestBytes()

    this.nextByte()
  }

  private def hasByte: Boolean = internalBytes.nonEmpty

  private def nextByte(): Int = {
    val byte = internalBytes.head

    internalBytes = internalBytes.tail

    byte
  }

  private def requestBytes(): Unit = {
    val inputRequest = InputRequest(prompt, password)
    // NOTE: Assuming already provided parent header and correct ids
    val kernelMessage = kmBuilder
      .withHeader(MessageType.Outgoing.InputRequest)
      .withContentString(inputRequest)
      .build

    // NOTE: The same handler is being used in both request and reply
    val responseFuture: Future[String] =
      (actorLoader.load(MessageType.Incoming.InputReply) ? kernelMessage)
      .mapTo[String]

    // Block until we get a response
    import scala.concurrent.duration._
    internalBytes ++=
      Await.result(responseFuture, Duration.Inf).getBytes(EncodingType)
  }
} 
Example 109
Source File: FactoryMethods.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.kernel.api

import java.io.{InputStream, OutputStream}

import org.apache.toree.kernel.protocol.v5
import org.apache.toree.kernel.protocol.v5.{KMBuilder, KernelMessage}
import org.apache.toree.kernel.protocol.v5.kernel.ActorLoader
import org.apache.toree.kernel.protocol.v5.stream.{KernelOutputStream, KernelInputStream}
import com.typesafe.config.Config


  override def newKernelOutputStream(
    streamType: String = KernelOutputStream.DefaultStreamType,
    sendEmptyOutput: Boolean = config.getBoolean("send_empty_output")
  ): OutputStream = {
    new v5.stream.KernelOutputStream(
      actorLoader,
      kmBuilder,
      org.apache.toree.global.ScheduledTaskManager.instance,
      streamType = streamType,
      sendEmptyOutput = sendEmptyOutput
    )
  }
} 
Example 110
Source File: StreamState.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.global

import java.io.{InputStream, OutputStream, PrintStream}


  def withStreams[T](thunk: => T): T = {
    init(_inputStream, _outputStream, _errorStream)

    val returnValue = Console.withIn(_inputStream) {
      Console.withOut(_outputStream) {
        Console.withErr(_errorStream) {
          thunk
        }
      }
    }

    reset()

    returnValue
  }
} 
Example 111
Source File: StreamingParserSpec.scala    From cormorant   with MIT License 5 votes vote down vote up
package io.chrisdavenport.cormorant
package fs2

import cats.data.NonEmptyList
import cats.effect._
import cats.effect.testing.specs2.CatsIO
import _root_.fs2.Stream
import io.chrisdavenport.cormorant._
// import io.chrisdavenport.cormorant.implicits._
// import scala.concurrent.duration._
import java.io.ByteArrayInputStream
import java.io.InputStream

class StreamingParserSpec extends CormorantSpec with CatsIO {

  def ruinDelims(str: String) = augmentString(str).flatMap {
    case '\n' => "\r\n"
    case c => c.toString
  }

  "Streaming Parser" should {
    // https://github.com/ChristopherDavenport/cormorant/pull/84
    "parse a known value that did not work with streaming" in {
      val x = """First Name,Last Name,Email
Larry,Bordowitz,[email protected]
Anonymous,Hippopotamus,[email protected]"""
      val source = IO.pure(new ByteArrayInputStream(ruinDelims(x).getBytes): InputStream)
      Stream.resource(Blocker[IO]).flatMap{blocker => 
        _root_.fs2.io.readInputStream(
          source,
          chunkSize = 4,
          blocker
        )
      }
        .through(_root_.fs2.text.utf8Decode)
        .through(parseComplete[IO])
        .compile
        .toVector
        .map{ v => 
          val header = CSV.Headers(NonEmptyList.of(CSV.Header("First Name"), CSV.Header("Last Name"), CSV.Header("Email")))
          val row1 = CSV.Row(NonEmptyList.of(CSV.Field("Larry"), CSV.Field("Bordowitz"), CSV.Field("[email protected]")))
          val row2 = CSV.Row(NonEmptyList.of(CSV.Field("Anonymous"), CSV.Field("Hippopotamus"), CSV.Field("[email protected]")))
          Vector(
            (header, row1),
            (header, row2)
          ) must_=== v
        }
    }
  }

  

} 
Example 112
Source File: process.scala    From scala-steward   with Apache License 2.0 5 votes vote down vote up
package org.scalasteward.core.io

import cats.effect._
import cats.implicits._
import fs2.Stream
import java.io.{File, IOException, InputStream}
import org.scalasteward.core.util._
import scala.collection.mutable.ListBuffer
import scala.concurrent.TimeoutException
import scala.concurrent.duration.FiniteDuration

object process {
  def slurp[F[_]](
      cmd: Nel[String],
      cwd: Option[File],
      extraEnv: Map[String, String],
      timeout: FiniteDuration,
      log: String => F[Unit],
      blocker: Blocker
  )(implicit contextShift: ContextShift[F], timer: Timer[F], F: Concurrent[F]): F[List[String]] =
    createProcess(cmd, cwd, extraEnv).flatMap { process =>
      F.delay(new ListBuffer[String]).flatMap { buffer =>
        val readOut = {
          val out = readInputStream[F](process.getInputStream, blocker)
          out.evalMap(line => F.delay(appendBounded(buffer, line, 4096)) >> log(line)).compile.drain
        }

        val showCmd = (extraEnv.map { case (k, v) => s"$k=$v" }.toList ++ cmd.toList).mkString_(" ")
        val result = readOut >> F.delay(process.waitFor()) >>= { exitValue =>
          if (exitValue === 0) F.pure(buffer.toList)
          else {
            val msg = s"'$showCmd' exited with code $exitValue"
            F.raiseError[List[String]](new IOException(makeMessage(msg, buffer.toList)))
          }
        }

        val fallback = F.delay(process.destroyForcibly()) >> {
          val msg = s"'$showCmd' timed out after ${timeout.toString}"
          F.raiseError[List[String]](new TimeoutException(makeMessage(msg, buffer.toList)))
        }

        Concurrent.timeoutTo(result, timeout, fallback)
      }
    }

  private def createProcess[F[_]](
      cmd: Nel[String],
      cwd: Option[File],
      extraEnv: Map[String, String]
  )(implicit F: Sync[F]): F[Process] =
    F.delay {
      val pb = new ProcessBuilder(cmd.toList: _*)
      val env = pb.environment()
      cwd.foreach(pb.directory)
      extraEnv.foreach { case (key, value) => env.put(key, value) }
      pb.redirectErrorStream(true)
      pb.start()
    }

  private def readInputStream[F[_]](is: InputStream, blocker: Blocker)(implicit
      F: Sync[F],
      cs: ContextShift[F]
  ): Stream[F, String] =
    fs2.io
      .readInputStream(F.pure(is), chunkSize = 4096, blocker)
      .through(fs2.text.utf8Decode)
      .through(fs2.text.lines)

  private def makeMessage(prefix: String, output: List[String]): String =
    (prefix :: output).mkString("\n")
} 
Example 113
Source File: TestUtils.scala    From scavro   with Apache License 2.0 5 votes vote down vote up
package org.oedura.scavro.plugin

import java.io.{FileOutputStream, InputStream}

import sbt._

import scala.io.Source
import scala.util.Random

class TestUtils(workingDir: File) {
  (workingDir / "in").mkdir
  (workingDir / "out").mkdir

  def tmpDir = workingDir
  def tmpPath = workingDir.getAbsolutePath

  private def extractResource(resourceName: String): File = {
    val is: InputStream = getClass.getResourceAsStream(s"/$resourceName")
    val text = Source.fromInputStream(is).mkString
    val os: FileOutputStream = new FileOutputStream(workingDir / "in" / resourceName)
    os.write(text.getBytes)
    os.close()
    is.close()

    workingDir / "in" / resourceName
  }

  lazy val schemaFile: File = extractResource("Number.avsc")
  lazy val protocolFile: File = {
    schemaFile
    extractResource("NumberSystem.avdl")
  }

  def cleanup() = {
    def getRecursively(f: File): Seq[File] = f.listFiles.filter(_.isDirectory).flatMap(getRecursively) ++ f.listFiles

    getRecursively(workingDir).foreach { f =>
      if (!f.delete()) throw new RuntimeException("Failed to delete " + f.getAbsolutePath)
    }
    tmpDir.delete()
  }
}

object TestUtils {
  private val alphabet = ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9')

  def randomFile(dir: File, prefix: String = "", suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): File = {
    def randomFileImpl(triesLeft: Int): String = {
      val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString
      if (!(dir / (prefix + testName + suffix)).exists) prefix + testName + suffix
      else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.")
      else randomFileImpl(triesLeft - 1)
    }

    dir / randomFileImpl(maxTries)
  }

  def randomFileName(prefix: String, suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): String = {
    def randomFileNameImpl(triesLeft: Int): String = {
      val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString
      if (!file(prefix + testName + suffix).exists) prefix + testName + suffix
      else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.")
      else randomFileNameImpl(triesLeft - 1)
    }

    randomFileNameImpl(maxTries)
  }

  def apply(workingDir: File) = {
    if (workingDir.exists && workingDir.isDirectory) new TestUtils(workingDir)
    else if (!workingDir.exists) {
      val success = workingDir.mkdirs
      if (success) new TestUtils(workingDir)
      else throw new Exception("Cannot initialize working directory")
    } else throw new Exception("Requested directory is occupied by ordinary file")
  }

} 
Example 114
Source File: LineBufferedStream.scala    From incubator-livy   with Apache License 2.0 5 votes vote down vote up
package org.apache.livy.utils

import java.io.InputStream
import java.util
import java.util.concurrent.locks.ReentrantLock

import scala.io.Source

import org.apache.livy.Logging

class CircularQueue[T](var capacity: Int) extends util.LinkedList[T] {
  override def add(t: T): Boolean = {
    if (size >= capacity) removeFirst
    super.add(t)
  }
}

class LineBufferedStream(inputStream: InputStream, logSize: Int) extends Logging {

  private[this] val _lines: CircularQueue[String] = new CircularQueue[String](logSize)

  private[this] val _lock = new ReentrantLock()
  private[this] val _condition = _lock.newCondition()
  private[this] var _finished = false

  private val thread = new Thread {
    override def run() = {
      val lines = Source.fromInputStream(inputStream).getLines()
      for (line <- lines) {
        info(line)
        _lock.lock()
        try {
          _lines.add(line)
          _condition.signalAll()
        } finally {
          _lock.unlock()
        }
      }

      _lock.lock()
      try {
        _finished = true
        _condition.signalAll()
      } finally {
        _lock.unlock()
      }
    }
  }
  thread.setDaemon(true)
  thread.start()

  def lines: IndexedSeq[String] = {
    _lock.lock()
    val lines = IndexedSeq.empty[String] ++ _lines.toArray(Array.empty[String])
    _lock.unlock()
    lines
  }

  def iterator: Iterator[String] = {
    new LinesIterator
  }

  def waitUntilClose(): Unit = thread.join()

  private class LinesIterator extends Iterator[String] {

    override def hasNext: Boolean = {
      if (_lines.size > 0) {
        true
      } else {
        // Otherwise we might still have more data.
        _lock.lock()
        try {
          if (_finished) {
            false
          } else {
            _condition.await()
            _lines.size > 0
          }
        } finally {
          _lock.unlock()
        }
      }
    }

    override def next(): String = {
      _lock.lock()
      val line = _lines.poll()
      _lock.unlock()
      line
    }
  }
} 
Example 115
Source File: TestHelper.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

} 
Example 116
Source File: OffsetSeqLog.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming


import java.io.{InputStream, OutputStream}
import java.nio.charset.StandardCharsets._

import scala.io.{Source => IOSource}

import org.apache.spark.sql.SparkSession


class OffsetSeqLog(sparkSession: SparkSession, path: String)
  extends HDFSMetadataLog[OffsetSeq](sparkSession, path) {

  override protected def deserialize(in: InputStream): OffsetSeq = {
    // called inside a try-finally where the underlying stream is closed in the caller
    def parseOffset(value: String): Offset = value match {
      case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null
      case json => SerializedOffset(json)
    }
    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
    if (!lines.hasNext) {
      throw new IllegalStateException("Incomplete log file")
    }
    val version = lines.next()
    if (version != OffsetSeqLog.VERSION) {
      throw new IllegalStateException(s"Unknown log version: ${version}")
    }

    // read metadata
    val metadata = lines.next().trim match {
      case "" => None
      case md => Some(md)
    }
    OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*)
  }

  override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = {
    // called inside a try-finally where the underlying stream is closed in the caller
    out.write(OffsetSeqLog.VERSION.getBytes(UTF_8))

    // write metadata
    out.write('\n')
    out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8))

    // write offsets, one per line
    offsetSeq.offsets.map(_.map(_.json)).foreach { offset =>
      out.write('\n')
      offset match {
        case Some(json: String) => out.write(json.getBytes(UTF_8))
        case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8))
      }
    }
  }
}

object OffsetSeqLog {
  private val VERSION = "v1"
  private val SERIALIZED_VOID_OFFSET = "-"
} 
Example 117
Source File: ProcessTestUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
} 
Example 118
Source File: CryptoStreamUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.security

import java.io.{InputStream, OutputStream}
import java.util.Properties
import javax.crypto.KeyGenerator
import javax.crypto.spec.{IvParameterSpec, SecretKeySpec}

import org.apache.commons.crypto.random._
import org.apache.commons.crypto.stream._

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._


  private[this] def createInitializationVector(properties: Properties): Array[Byte] = {
    val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
    val initialIVStart = System.currentTimeMillis()
    CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv)
    val initialIVFinish = System.currentTimeMillis()
    val initialIVTime = initialIVFinish - initialIVStart
    if (initialIVTime > 2000) {
      logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " +
        s"used by CryptoStream")
    }
    iv
  }
} 
Example 119
Source File: CommandUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
} 
Example 120
Source File: HDFSExecutorMetricsReplayListenerBus.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.InputStream

import scala.collection.immutable
import scala.collection.mutable.ListBuffer
import scala.io.Source
import scala.util.parsing.json._

import org.apache.spark.internal.Logging

private[spark] class HDFSExecutorMetricsReplayListenerBus extends SparkListenerBus with Logging {

  
  def replay(
              logDataList: ListBuffer[(InputStream, String)],
              sourceName: String,
              maybeTruncated: Boolean = false): Unit = {

    logDataList.foreach(logData => {
      try {
        for (line <- Source.fromInputStream(logData._1).getLines()) {
          val hashMapParsed = JSON.parseFull(line)
          val hashMap = {
            hashMapParsed match {
              case Some(m: Map[String, Any]) => m
              case _ => new immutable.HashMap[String, Any]
            }
          }
          val hdfsExecutorMetrics = new HDFSExecutorMetrics(
            hashMap("values").asInstanceOf[Map[String, Any]],
            hashMap("host").asInstanceOf[String],
            hashMap("timestamp").asInstanceOf[Double].toLong)
          postToAll(hdfsExecutorMetrics)
        }
      } catch {
        case ex: Exception =>
          ex.printStackTrace();
          logError(ex.toString)
          logWarning(s"Got JsonParseException from log file $logData")
      }
    })
  }
} 
Example 121
Source File: exercise08.scala    From scala-for-the-Impatient   with MIT License 5 votes vote down vote up
import java.io.{InputStream, FileInputStream}


trait Buffering {
  this: InputStream =>

  val BUF_SIZE: Int = 5
  private val buf = new Array[Byte](BUF_SIZE)
  private var bufsize: Int = 0
  private var pos: Int = 0

  override def read(): Int = {
    if (pos >= bufsize) {
      bufsize = this.read(buf, 0, BUF_SIZE)
      if (bufsize > 0) -1
      pos = 0
    }
    pos += 1
    buf(pos-1)
  }
}

val f = new FileInputStream("exercise08.txt") with Buffering

for(i <- 1 to 10) println(f.read()) 
Example 122
Source File: exercise09.scala    From scala-for-the-Impatient   with MIT License 5 votes vote down vote up
import java.io.{InputStream, FileInputStream}

trait Logger {
  def log(msg: String)
}

trait NoneLogger extends Logger {
  def log(msg: String) = {}
}

trait PrintLogger extends Logger {
  def log(msg: String) = println(msg)
}


trait Buffering {
  this: InputStream with Logger =>

  val BUF_SIZE: Int = 5
  private val buf = new Array[Byte](BUF_SIZE)
  private var bufsize: Int = 0
  private var pos: Int = 0

  override def read(): Int = {
    if (pos >= bufsize) {
      bufsize = this.read(buf, 0, BUF_SIZE)
      log("buffered %d bytes: %s".format(bufsize, buf.mkString(", ")))
      if (bufsize > 0) -1
      pos = 0
    }
    pos += 1
    buf(pos-1)
  }
}

val f = new FileInputStream("exercise08.txt") with Buffering with PrintLogger

for(i <- 1 to 10) println(f.read()) 
Example 123
Source File: DemoFileUploadServlet.scala    From udash-demos   with GNU General Public License v3.0 5 votes vote down vote up
package io.udash.demos.files.jetty

import scala.concurrent.ExecutionContext.Implicits.global
import java.io.{File, InputStream}
import java.nio.file.Files
import java.util.UUID

import io.udash.demos.files.UploadedFile
import io.udash.demos.files.rpc.ClientRPC
import io.udash.demos.files.services.FilesStorage
import io.udash.rpc._

class DemoFileUploadServlet(uploadDir: String) extends FileUploadServlet(Set("file", "files")) {
  new File(uploadDir).mkdir()

  override protected def handleFile(name: String, content: InputStream): Unit = {
    val targetName: String = s"${UUID.randomUUID()}_${name.replaceAll("[^a-zA-Z0-9.-]", "_")}"
    val targetFile = new File(uploadDir, targetName)
    Files.copy(content, targetFile.toPath)
    FilesStorage.add(
      UploadedFile(name, targetName, targetFile.length())
    )

    // Notify clients
    ClientRPC(AllClients).fileStorageUpdated()
  }
} 
Example 124
Source File: MetricsConfig.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.Logging
import org.apache.spark.util.Utils

private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  def initialize() {
    // Add default properties in case there's no properties file
    setDefaultProperties(properties)

    // If spark.metrics.conf is not set, try to get file in class path
    val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse {
      try {
        Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME))
      } catch {
        case e: Exception =>
          logError("Error loading default configuration file", e)
          None
      }
    }

    isOpt.foreach { is =>
      try {
        properties.load(is)
      } finally {
        is.close()
      }
    }

    propertyCategories = subProperties(properties, INSTANCE_REGEX)
    if (propertyCategories.contains(DEFAULT_PREFIX)) {
      import scala.collection.JavaConversions._

      val defaultProperty = propertyCategories(DEFAULT_PREFIX)
      for { (inst, prop) <- propertyCategories
            if (inst != DEFAULT_PREFIX)
            (k, v) <- defaultProperty
            if (prop.getProperty(k) == null) } {
        prop.setProperty(k, v)
      }
    }
  }

  def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
    val subProperties = new mutable.HashMap[String, Properties]
    import scala.collection.JavaConversions._
    prop.foreach { kv =>
      if (regex.findPrefixOf(kv._1).isDefined) {
        val regex(prefix, suffix) = kv._1
        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
      }
    }
    subProperties
  }

  def getInstance(inst: String): Properties = {
    propertyCategories.get(inst) match {
      case Some(s) => s
      case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
    }
  }
} 
Example 125
Source File: ReplayListenerBus.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(logData: InputStream, sourceName: String): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      lines.foreach { line =>
        currentLine = line
        postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

} 
Example 126
Source File: ProtoCpgLoader.scala    From codepropertygraph   with Apache License 2.0 5 votes vote down vote up
package io.shiftleft.codepropertygraph.cpgloading

import java.io.InputStream
import java.nio.file.{Files, Path}

import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.proto.cpg.Cpg.{CpgOverlay, CpgStruct, DiffGraph}
import org.apache.logging.log4j.LogManager
import java.util.{Collection => JCollection, List => JList}

import com.google.protobuf.GeneratedMessageV3

import scala.jdk.CollectionConverters._
import scala.util.{Failure, Success, Try, Using}
import overflowdb.OdbConfig
import io.shiftleft.proto.cpg.Cpg.CpgStruct.Edge

import scala.collection.mutable.ArrayBuffer

object ProtoCpgLoader {
  private val logger = LogManager.getLogger(getClass)

  def loadFromProtoZip(fileName: String, overflowDbConfig: OdbConfig = OdbConfig.withoutOverflow): Cpg =
    measureAndReport {
      val builder = new ProtoToCpg(overflowDbConfig)
      Using.Manager { use =>
        val edgeLists: ArrayBuffer[JCollection[Edge]] = ArrayBuffer.empty
        use(new ZipArchive(fileName)).entries.foreach { entry =>
          val inputStream = use(Files.newInputStream(entry))
          val cpgStruct = getNextProtoCpgFromStream(inputStream)
          builder.addNodes(cpgStruct.getNodeList)
          edgeLists += cpgStruct.getEdgeList
        }
        edgeLists.foreach(edgeCollection => builder.addEdges(edgeCollection))
      } match {
        case Failure(exception) => throw exception
        case Success(_)         => builder.build()
      }
    }

  def loadFromListOfProtos(cpgs: Seq[CpgStruct], overflowDbConfig: OdbConfig): Cpg = {
    val builder = new ProtoToCpg(overflowDbConfig)
    cpgs.foreach(cpg => builder.addNodes(cpg.getNodeList))
    cpgs.foreach(cpg => builder.addEdges(cpg.getEdgeList))
    builder.build()
  }

  def loadFromListOfProtos(cpgs: JList[CpgStruct], overflowDbConfig: OdbConfig): Cpg =
    loadFromListOfProtos(cpgs.asScala.toSeq, overflowDbConfig)

  def loadOverlays(fileName: String): Try[Iterator[CpgOverlay]] =
    loadOverlays(fileName, CpgOverlay.parseFrom)

  def loadDiffGraphs(fileName: String): Try[Iterator[DiffGraph]] =
    loadOverlays(fileName, DiffGraph.parseFrom)

  private def loadOverlays[T <: GeneratedMessageV3](fileName: String, f: InputStream => T): Try[Iterator[T]] =
    Using(new ZipArchive(fileName)) { zip =>
      zip.entries
        .sortWith(compareOverlayPath)
        .map { path =>
          val is = Files.newInputStream(path)
          f(is)
        }
        .iterator
    }

  private def compareOverlayPath(a: Path, b: Path): Boolean = {
    val file1Split: Array[String] = a.toString.replace("/", "").split("_")
    val file2Split: Array[String] = b.toString.replace("/", "").split("_")
    if (file1Split.length < 2 || file2Split.length < 2)
      a.toString < b.toString
    else
      file1Split(0).toInt < file2Split(0).toInt
  }

  private def getNextProtoCpgFromStream(inputStream: InputStream) =
    CpgStruct.parseFrom(inputStream)

  private def measureAndReport[A](f: => A): A = {
    val start = System.currentTimeMillis()
    val result = f
    logger.info("CPG construction finished in " + (System.currentTimeMillis() - start) + "ms.")
    result
  }
} 
Example 127
Source File: Runner.scala    From avrohugger   with Apache License 2.0 5 votes vote down vote up
package avrohugger
package tool

import format.abstractions.SourceFormat
import format.{Scavro, SpecificRecord, Standard}
import java.util.Arrays
import java.util.Map
import java.util.TreeMap
import java.io.{InputStream, PrintStream}

import org.apache.avro.tool.Tool

import scala.util.{Failure, Success, Try}
import scala.collection.JavaConverters._



  def run(args: Array[String]): Int = {
    if (args.length != 0) {
      val tool: Tool = toolsMap.get(args(0))
      if (tool != null) {
        val result = Try {
          tool.run(
            in, out, err, Arrays.asList(args: _*).subList(1, args.length))
        }
        result match {
          case Success(0) => 0
          case Success(exitCode) =>
            err.println("Tool " + args(0) + " failed with exit code " + exitCode)
            exitCode
          case Failure(e) =>
            err.println("Tool " + args(0) + " failed: " + e.toString)
            1
        }
      } else {
        err.println("Unknown tool: " + args(0))
        1
      }
    } else {
      err.println("----------------")

      err.println("Available tools:")
      for (k <- toolsMap.asScala.values) {
        err.printf("%" + maxLen + "s  %s\n", k.getName(), k.getShortDescription())
      }

      1
    }
  }
} 
Example 128
Source File: DynaMLSSH.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml

import java.io.{InputStream, OutputStream, PrintStream}

import ammonite.ops.Path
import ammonite.runtime.Storage
import ammonite.sshd.{SshServer, SshServerConfig}
import ammonite.sshd.util.Environment
import ammonite.util.{Bind, Colors}


class DynaMLSSH(
  sshConfig: SshServerConfig,
  predef: String = "",
  defaultPredef: Boolean = true,
  wd: os.Path = os.pwd,
  replArgs: Seq[Bind[_]] = Nil,
  classLoader: ClassLoader = DynaMLSSH.getClass.getClassLoader) {
  private lazy val sshd = SshServer(
    sshConfig,
    shellServer = DynaMLSSH.runRepl(
      sshConfig.ammoniteHome,
      predef,
      defaultPredef,
      wd,
      replArgs,
      classLoader
    )
  )

  def port                    = sshd.getPort
  def start(): Unit           = sshd.start()
  def stop(): Unit            = sshd.stop()
  def stopImmediately(): Unit = sshd.stop(true)
}

object DynaMLSSH {
  // Actually runs a repl inside of session serving a remote user shell.
  private def runRepl(
    homePath: os.Path,
    predefCode: String,
    defaultPredef: Boolean,
    wd: os.Path,
    replArgs: Seq[Bind[_]],
    replServerClassLoader: ClassLoader
  )(in: InputStream,
    out: OutputStream
  ): Unit = {
    // since sshd server has it's own customised environment,
    // where things like System.out will output to the
    // server's console, we need to prepare individual environment
    // to serve this particular user's session

    Environment.withEnvironment(Environment(replServerClassLoader, in, out)) {
      try {
        DynaML(
          predefCode = predefCode,
          predefFile = None,
          defaultPredef = defaultPredef,
          storageBackend = new Storage.Folder(homePath),
          wd = wd,
          inputStream = in,
          outputStream = out,
          errorStream = out,
          verboseOutput = false,
          remoteLogging = false,
          colors = Colors.Default
        ).run(replArgs: _*)
      } catch {
        case any: Throwable =>
          val sshClientOutput = new PrintStream(out)
          sshClientOutput.println(
            "What a terrible failure, DynaML just blew up!"
          )
          any.printStackTrace(sshClientOutput)
      }
    }
  }
} 
Example 129
Source File: GenericReader.scala    From protobuf-generic   with Apache License 2.0 5 votes vote down vote up
package me.lyh.protobuf.generic

import java.io.{InputStream, ObjectInputStream, ObjectOutputStream, OutputStream}
import java.nio.ByteBuffer
import java.util.{ArrayList => JArrayList, LinkedHashMap => JLinkedHashMap, TreeMap => JTreeMap}

import com.google.protobuf.Descriptors.FieldDescriptor.Type
import com.google.protobuf.{CodedInputStream, WireFormat}

import scala.collection.JavaConverters._

object GenericReader {
  def of(schema: Schema): GenericReader = new GenericReader(schema)
}

class GenericReader(val schema: Schema) extends Serializable {
  def read(buf: Array[Byte]): GenericRecord =
    read(CodedInputStream.newInstance(buf), schema.root)

  def read(buf: ByteBuffer): GenericRecord =
    read(CodedInputStream.newInstance(buf), schema.root)

  def read(input: InputStream): GenericRecord =
    read(CodedInputStream.newInstance(input), schema.root)

  private def read(input: CodedInputStream, messageSchema: MessageSchema): GenericRecord = {
    val map = new JTreeMap[java.lang.Integer, Any]()
    while (!input.isAtEnd) {
      val tag = input.readTag()
      val id = WireFormat.getTagFieldNumber(tag)
      val field = messageSchema.fields(id)

      if (field.label == Label.REPEATED) {
        if (!map.containsKey(id)) {
          map.put(id, new JArrayList[Any]())
        }
        val list = map.get(id).asInstanceOf[java.util.ArrayList[Any]]
        if (field.packed) {
          val bytesIn = CodedInputStream.newInstance(input.readByteBuffer())
          while (!bytesIn.isAtEnd) {
            list.add(readValue(bytesIn, field))
          }
        } else {
          list.add(readValue(input, field))
        }
      } else {
        map.put(id, readValue(input, field))
      }
    }

    val result = new JLinkedHashMap[String, Any]()
    map.asScala.foreach(kv => result.put(messageSchema.fields(kv._1).name, kv._2))
    messageSchema.fields.valuesIterator.foreach { f =>
      if (f.default.isDefined && !result.containsKey(f.name)) {
        result.put(f.name, f.default.get)
      }
    }
    result
  }

  private def readValue(in: CodedInputStream, field: Field): Any = field.`type` match {
    case Type.FLOAT    => in.readFloat()
    case Type.DOUBLE   => in.readDouble()
    case Type.FIXED32  => in.readFixed32()
    case Type.FIXED64  => in.readFixed64()
    case Type.INT32    => in.readInt32()
    case Type.INT64    => in.readInt64()
    case Type.UINT32   => in.readUInt32()
    case Type.UINT64   => in.readUInt64()
    case Type.SFIXED32 => in.readSFixed32()
    case Type.SFIXED64 => in.readSFixed64()
    case Type.SINT32   => in.readSInt32()
    case Type.SINT64   => in.readSInt64()
    case Type.BOOL     => in.readBool()
    case Type.STRING   => in.readString()
    case Type.BYTES    => Base64.encode(in.readByteArray())
    case Type.ENUM     => schema.enums(field.schema.get).values(in.readEnum())
    case Type.MESSAGE =>
      val nestedIn = CodedInputStream.newInstance(in.readByteBuffer())
      read(nestedIn, schema.messages(field.schema.get))
    case Type.GROUP => throw new IllegalArgumentException("Unsupported type: GROUP")
  }

  private def readObject(in: ObjectInputStream): Unit = {
    val schema = Schema.fromJson(in.readUTF())

    val schemaField = getClass.getDeclaredField("schema")
    schemaField.setAccessible(true)
    schemaField.set(this, schema)
  }

  private def writeObject(out: ObjectOutputStream): Unit =
    out.writeUTF(schema.toJson)
} 
Example 130
Source File: AmqpFieldValueSpec.scala    From fs2-rabbit   with Apache License 2.0 5 votes vote down vote up
package dev.profunktor.fs2rabbit

import java.io.{DataInputStream, DataOutputStream, InputStream, OutputStream}
import java.time.Instant

import com.rabbitmq.client.impl.{ValueReader, ValueWriter}
import dev.profunktor.fs2rabbit.model.AmqpFieldValue._
import dev.profunktor.fs2rabbit.model.{AmqpFieldValue, ShortString}
import org.scalatest.flatspec.AnyFlatSpecLike
import org.scalatest.Assertion
import org.scalatest.matchers.should.Matchers

class AmqpFieldValueSpec extends AnyFlatSpecLike with Matchers with AmqpPropertiesArbitraries {

  it should "convert from and to Java primitive header values" in {
    val intVal    = IntVal(1)
    val longVal   = LongVal(2L)
    val stringVal = StringVal("hey")
    val arrayVal  = ArrayVal(Vector(IntVal(3), IntVal(2), IntVal(1)))

    AmqpFieldValue.unsafeFrom(intVal.toValueWriterCompatibleJava) should be(intVal)
    AmqpFieldValue.unsafeFrom(longVal.toValueWriterCompatibleJava) should be(longVal)
    AmqpFieldValue.unsafeFrom(stringVal.toValueWriterCompatibleJava) should be(stringVal)
    AmqpFieldValue.unsafeFrom("fs2") should be(StringVal("fs2"))
    AmqpFieldValue.unsafeFrom(arrayVal.toValueWriterCompatibleJava) should be(arrayVal)
  }
  it should "preserve the same value after a round-trip through impure and from" in {
    forAll { amqpHeaderVal: AmqpFieldValue =>
      AmqpFieldValue.unsafeFrom(amqpHeaderVal.toValueWriterCompatibleJava) == amqpHeaderVal
    }
  }

  it should "preserve the same values after a round-trip through the Java ValueReader and ValueWriter" in {
    forAll(assertThatValueIsPreservedThroughJavaWriteAndRead _)
  }

  it should "preserve a specific StringVal that previously failed after a round-trip through the Java ValueReader and ValueWriter" in {
    assertThatValueIsPreservedThroughJavaWriteAndRead(StringVal("kyvmqzlbjivLqQFukljghxdowkcmjklgSeybdy"))
  }

  it should "preserve a specific DateVal created from an Instant that has millisecond accuracy after a round-trip through the Java ValueReader and ValueWriter" in {
    val instant   = Instant.parse("4000-11-03T20:17:29.57Z")
    val myDateVal = TimestampVal.from(instant)
    assertThatValueIsPreservedThroughJavaWriteAndRead(myDateVal)
  }

  "DecimalVal" should "reject a BigDecimal of an unscaled value with 33 bits..." in {
    DecimalVal.from(BigDecimal(Int.MaxValue) + BigDecimal(1)) should be(None)
  }
  it should "reject a BigDecimal with a scale over octet size" in {
    DecimalVal.from(new java.math.BigDecimal(java.math.BigInteger.valueOf(12345L), 1000)) should be(None)
  }

  // We need to wrap things in a dummy table because the method that would be
  // great to test with ValueReader, readFieldValue, is private, and so we
  // have to call the next best thing, readTable.
  private def wrapInDummyTable(value: AmqpFieldValue): TableVal =
    TableVal(Map(ShortString.unsafeFrom("dummyKey") -> value))

  private def createWriterFromQueue(outputResults: collection.mutable.Queue[Byte]): ValueWriter =
    new ValueWriter({
      new DataOutputStream({
        new OutputStream {
          override def write(b: Int): Unit =
            outputResults.enqueue(b.toByte)
        }
      })
    })

  private def createReaderFromQueue(input: collection.mutable.Queue[Byte]): ValueReader = {
    val inputStream = new InputStream {
      override def read(): Int =
        try {
          val result = input.dequeue()
          // A signed -> unsigned conversion because bytes by default are
          // converted into signed ints, which is bad when the API of read
          // states that negative numbers indicate EOF...
          0Xff & result.toInt
        } catch {
          case _: NoSuchElementException => -1
        }

      override def available(): Int = {
        val result = input.size
        result
      }
    }
    new ValueReader(new DataInputStream(inputStream))
  }

  private def assertThatValueIsPreservedThroughJavaWriteAndRead(amqpHeaderVal: AmqpFieldValue): Assertion = {
    val outputResultsAsTable = collection.mutable.Queue.empty[Byte]
    val tableWriter          = createWriterFromQueue(outputResultsAsTable)
    tableWriter.writeTable(wrapInDummyTable(amqpHeaderVal).toValueWriterCompatibleJava)

    val reader    = createReaderFromQueue(outputResultsAsTable)
    val readValue = reader.readTable()
    AmqpFieldValue.unsafeFrom(readValue) should be(wrapInDummyTable(amqpHeaderVal))
  }
} 
Example 131
Source File: FromInputStreamInput.scala    From borer   with Mozilla Public License 2.0 5 votes vote down vote up
package io.bullet.borer.input

import java.io.InputStream
import java.util

import io.bullet.borer.{ByteAccess, Input}

trait FromInputStreamInput { this: FromByteArrayInput with FromIteratorInput =>

  private object FromInputStreamProvider extends Input.Provider[InputStream] {
    type Bytes = Array[Byte]
    def byteAccess                = ByteAccess.ForByteArray
    def apply(value: InputStream) = fromInputStream(value)
  }

  implicit def FromInputStreamProvider[T <: InputStream]: Input.Provider[T] =
    FromInputStreamProvider.asInstanceOf[Input.Provider[T]]

  def fromInputStream(inputStream: InputStream, bufferSize: Int = 16384): Input[Array[Byte]] = {
    if (bufferSize < 256) throw new IllegalArgumentException(s"bufferSize must be >= 256 but was $bufferSize")
    val iterator: Iterator[Input[Array[Byte]]] =
      new Iterator[Input[Array[Byte]]] {
        private[this] val bufA                          = new Array[Byte](bufferSize)
        private[this] val bufB                          = new Array[Byte](bufferSize)
        private[this] var bufSelect: Boolean            = _
        private[this] var nextInput: Input[Array[Byte]] = _

        def hasNext = {
          def tryReadNext() = {
            val buf = if (bufSelect) bufA else bufB
            nextInput = inputStream.read(buf) match {
              case -1 => null
              case `bufferSize` =>
                bufSelect = !bufSelect
                fromByteArray(buf)
              case byteCount => fromByteArray(util.Arrays.copyOfRange(buf, 0, byteCount))
            }
            nextInput ne null
          }
          (nextInput ne null) || tryReadNext()
        }

        def next() =
          if (hasNext) {
            val result = nextInput
            nextInput = null
            result
          } else throw new NoSuchElementException
      }
    fromIterator(iterator)
  }

} 
Example 132
Source File: FromInputStreamInputSpec.scala    From borer   with Mozilla Public License 2.0 5 votes vote down vote up
package io.bullet.borer.input

import java.io.InputStream

import io.bullet.borer._
import utest._

import scala.util.Random

object FromInputStreamInputSpec extends TestSuite with TestUtils {

  val random = new Random

  val tests = Tests {

    "FromInputStreamInput" - {

      def newBytesIterator = Iterator.from(0).take(10000).map(_.toByte)

      val bytes = newBytesIterator

      val inputStream = new InputStream {
        def read() = ???
        override def read(b: Array[Byte]) =
          if (bytes.hasNext) {
            val chunk = random.nextInt(4) match {
              case 0     => Array.emptyByteArray
              case 1 | 2 => bytes.take(b.length).toArray[Byte]
              case 3     => bytes.take(random.nextInt(b.length) + 1).toArray[Byte]
            }
            System.arraycopy(chunk, 0, b, 0, chunk.length)
            chunk.length
          } else -1
      }

      val input = Input.fromInputStream(inputStream, bufferSize = 300)

      val paddingProvider = new Input.PaddingProvider[Array[Byte]] {
        def padByte()                                  = 42
        def padDoubleByte(remaining: Int)              = ???
        def padQuadByte(remaining: Int)                = ???
        def padOctaByte(remaining: Int)                = ???
        def padBytes(rest: Array[Byte], missing: Long) = ???
      }

      for {
        (a, b) <- newBytesIterator.map(_ -> input.readBytePadded(paddingProvider))
      } a ==> b

      input.cursor ==> 10000

      input.readBytePadded(paddingProvider) ==> 42
    }
  }
} 
Example 133
Source File: Markdown.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.convert.flexmark

import java.io.{InputStream, InputStreamReader}
import java.nio.charset.Charset
import java.util

import scala.util.Try

import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common._

import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension
import com.vladsch.flexmark.ext.tables.TablesExtension
import com.vladsch.flexmark.html.HtmlRenderer
import com.vladsch.flexmark.parser.Parser
import com.vladsch.flexmark.util.data.{DataKey, MutableDataSet}

object Markdown {

  def toHtml(
      is: InputStream,
      cfg: MarkdownConfig,
      cs: Charset
  ): Either[Throwable, String] = {
    val p = createParser()
    val r = createRenderer()
    Try {
      val reader = new InputStreamReader(is, cs)
      val doc    = p.parseReader(reader)
      wrapHtml(r.render(doc), cfg)
    }.toEither
  }

  def toHtml(md: String, cfg: MarkdownConfig): String = {
    val p   = createParser()
    val r   = createRenderer()
    val doc = p.parse(md)
    wrapHtml(r.render(doc), cfg)
  }

  def toHtml[F[_]: Sync](
      data: Stream[F, Byte],
      cfg: MarkdownConfig,
      cs: Charset
  ): F[String] =
    data.through(Binary.decode(cs)).compile.foldMonoid.map(str => toHtml(str, cfg))

  private def wrapHtml(body: String, cfg: MarkdownConfig): String =
    s"""<!DOCTYPE html>
       |<html>
       |<head>
       |<meta charset="utf-8"/>
       |<style>
       |${cfg.internalCss}
       |</style>
       |</head>
       |<body>
       |$body
       |</body>
       |</html>
       |""".stripMargin

  private def createParser(): Parser = {
    val opts = new MutableDataSet()
    opts.set(
      Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]],
      util.Arrays.asList(TablesExtension.create(), StrikethroughExtension.create())
    );

    Parser.builder(opts).build()
  }

  private def createRenderer(): HtmlRenderer = {
    val opts = new MutableDataSet()
    HtmlRenderer.builder(opts).build()
  }
} 
Example 134
Source File: PdfboxExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.pdfbox

import java.io.InputStream
import java.nio.file.Path

import scala.util.{Try, Using}

import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.extract.internal.Text

import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.text.PDFTextStripper

object PdfboxExtract {

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile
      .to(Array)
      .map(bytes => Using(PDDocument.load(bytes))(readText).toEither.flatten)

  def get(is: InputStream): Either[Throwable, Text] =
    Using(PDDocument.load(is))(readText).toEither.flatten

  def get(inFile: Path): Either[Throwable, Text] =
    Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten

  private def readText(doc: PDDocument): Either[Throwable, Text] =
    Try {
      val stripper = new PDFTextStripper()
      stripper.setAddMoreFormatting(true)
      stripper.setLineSeparator("\n")
      Text(Option(stripper.getText(doc)))
    }.toEither
} 
Example 135
Source File: OdfExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.odf

import java.io.{ByteArrayInputStream, InputStream}

import scala.util.Try

import cats.effect._
import cats.implicits._
import fs2.Stream

import docspell.extract.internal.Text

import org.apache.tika.metadata.Metadata
import org.apache.tika.parser.ParseContext
import org.apache.tika.parser.odf.OpenDocumentParser
import org.apache.tika.sax.BodyContentHandler

object OdfExtract {

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)

  def get(is: InputStream) =
    Try {
      val handler  = new BodyContentHandler()
      val pctx     = new ParseContext()
      val meta     = new Metadata()
      val ooparser = new OpenDocumentParser()
      ooparser.parse(is, handler, meta, pctx)
      Text(Option(handler.toString))
    }.toEither

} 
Example 136
Source File: RtfExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.rtf

import java.io.{ByteArrayInputStream, InputStream}
import javax.swing.text.rtf.RTFEditorKit

import scala.util.Try

import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common.MimeType
import docspell.extract.internal.Text

object RtfExtract {

  val rtfType = MimeType.application("rtf")

  def get(is: InputStream): Either[Throwable, Text] =
    Try {
      val kit = new RTFEditorKit()
      val doc = kit.createDefaultDocument()
      kit.read(is, doc, 0)
      Text(doc.getText(0, doc.getLength))
    }.toEither

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
} 
Example 137
Source File: PoiExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.poi

import java.io.{ByteArrayInputStream, InputStream}

import scala.util.Try

import cats.data.EitherT
import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common._
import docspell.extract.internal.Text
import docspell.files.TikaMimetype

import org.apache.poi.hssf.extractor.ExcelExtractor
import org.apache.poi.hssf.usermodel.HSSFWorkbook
import org.apache.poi.hwpf.extractor.WordExtractor
import org.apache.poi.xssf.extractor.XSSFExcelExtractor
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
import org.apache.poi.xwpf.usermodel.XWPFDocument

object PoiExtract {

  def get[F[_]: Sync](
      data: Stream[F, Byte],
      hint: MimeTypeHint
  ): F[Either[Throwable, Text]] =
    TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt))

  def get[F[_]: Sync](
      data: Stream[F, Byte],
      mime: MimeType
  ): F[Either[Throwable, Text]] =
    mime match {
      case PoiType.doc =>
        getDoc(data)
      case PoiType.xls =>
        getXls(data)
      case PoiType.xlsx =>
        getXlsx(data)
      case PoiType.docx =>
        getDocx(data)
      case PoiType.msoffice =>
        EitherT(getDoc[F](data))
          .recoverWith({
            case _ => EitherT(getXls[F](data))
          })
          .value
      case PoiType.ooxml =>
        EitherT(getDocx[F](data))
          .recoverWith({
            case _ => EitherT(getXlsx[F](data))
          })
          .value
      case mt =>
        Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}")))
    }

  def getDocx(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new XWPFWordExtractor(new XWPFDocument(is))
      Text(Option(xt.getText))
    }.toEither

  def getDoc(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new WordExtractor(is)
      Text(Option(xt.getText))
    }.toEither

  def getXlsx(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new XSSFExcelExtractor(new XSSFWorkbook(is))
      Text(Option(xt.getText))
    }.toEither

  def getXls(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new ExcelExtractor(new HSSFWorkbook(is))
      Text(Option(xt.getText))
    }.toEither

  def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx)

  def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc)

  def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx)

  def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls)

} 
Example 138
Source File: Zip.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.files

import java.io.InputStream
import java.nio.file.Paths
import java.util.zip.ZipInputStream

import cats.effect._
import cats.implicits._
import fs2.{Pipe, Stream}

import docspell.common.Binary

object Zip {

  def unzipP[F[_]: ConcurrentEffect: ContextShift](
      chunkSize: Int,
      blocker: Blocker
  ): Pipe[F, Byte, Binary[F]] =
    s => unzip[F](chunkSize, blocker)(s)

  def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)(
      data: Stream[F, Byte]
  ): Stream[F, Binary[F]] =
    data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker))

  def unzipJava[F[_]: Sync: ContextShift](
      in: InputStream,
      chunkSize: Int,
      blocker: Blocker
  ): Stream[F, Binary[F]] = {
    val zin = new ZipInputStream(in)

    val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) {
      case Some(_) => Sync[F].delay(zin.closeEntry())
      case None    => ().pure[F]
    }

    Stream
      .resource(nextEntry)
      .repeat
      .unNoneTerminate
      .map { ze =>
        val name = Paths.get(ze.getName()).getFileName.toString
        val data =
          fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false)
        Binary(name, data)
      }
  }
} 
Example 139
Source File: ImageSize.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.files

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.file.Path
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
import javax.imageio.{ImageIO, ImageReader}

import scala.jdk.CollectionConverters._
import scala.util.{Try, Using}

import cats.effect._
import cats.implicits._
import fs2.Stream

object ImageSize {

  
  def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] =
    data.take(768).compile.to(Array).map { ar =>
      val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
      if (iis == null) sys.error("no reader given for the array")
      else getDimension(iis)
    }

  private def getDimension(in: ImageInputStream): Option[Dimension] =
    ImageIO
      .getImageReaders(in)
      .asScala
      .to(LazyList)
      .collectFirst(Function.unlift { reader =>
        val dim = getDimension(in, reader).toOption
        reader.dispose()
        dim
      })

  private def getDimension(
      in: ImageInputStream,
      reader: ImageReader
  ): Either[Throwable, Dimension] =
    Try {
      reader.setInput(in)
      val width  = reader.getWidth(reader.getMinIndex)
      val height = reader.getHeight(reader.getMinIndex)
      Dimension(width, height)
    }.toEither
} 
Example 140
Source File: ProcessStreamConnectionProvider.scala    From intellij-lsp   with Apache License 2.0 5 votes vote down vote up
package com.github.gtache.lsp.client.connection

import java.io.{File, IOException, InputStream, OutputStream}
import java.util.Objects

import com.intellij.openapi.diagnostic.Logger
import org.jetbrains.annotations.Nullable


class ProcessStreamConnectionProvider(private var commands: Seq[String], private var workingDir: String) extends StreamConnectionProvider {
  private val LOG: Logger = Logger.getInstance(classOf[ProcessStreamConnectionProvider])
  @Nullable private var process: Process = _

  @throws[IOException]
  override def start(): Unit = {
    if (this.workingDir == null || this.commands == null || this.commands.isEmpty || this.commands.contains(null)) throw new IOException("Unable to start language server: " + this.toString) //$NON-NLS-1$
    val builder = createProcessBuilder
    LOG.info("Starting server process with commands " + commands + " and workingDir " + workingDir)
    this.process = builder.start

    if (!process.isAlive) throw new IOException("Unable to start language server: " + this.toString) else LOG.info("Server process started " + process)
  }

  protected def createProcessBuilder: ProcessBuilder = {
    import scala.collection.JavaConverters._
    val builder = new ProcessBuilder(getCommands.map(s => s.replace("\'", "")).asJava)
    builder.directory(new File(getWorkingDirectory))
    builder
  }

  protected def getCommands: Seq[String] = commands

  def setCommands(commands: Seq[String]): Unit = {
    this.commands = commands
  }

  protected def getWorkingDirectory: String = workingDir

  def setWorkingDirectory(workingDir: String): Unit = {
    this.workingDir = workingDir
  }

  @Nullable override def getInputStream: InputStream = {
    if (process == null) null
    else process.getInputStream
  }

  @Nullable override def getOutputStream: OutputStream = {
    if (process == null) null
    else process.getOutputStream
  }

  @Nullable override def getErrorStream: InputStream = {
    if (process == null) null
    else process.getErrorStream
  }

  override def stop(): Unit = {
    if (process != null) process.destroy()
  }

  override def equals(obj: Any): Boolean = {
    obj match {
      case other: ProcessStreamConnectionProvider =>
        getCommands.size == other.getCommands.size && this.getCommands.toSet == other.getCommands.toSet && this.getWorkingDirectory == other.getWorkingDirectory
      case _ => false
    }

  }

  override def hashCode: Int = {
    Objects.hashCode(this.getCommands) ^ Objects.hashCode(this.getWorkingDirectory)
  }
} 
Example 141
Source File: ProcessOverSocketStreamConnectionProvider.scala    From intellij-lsp   with Apache License 2.0 5 votes vote down vote up
package com.github.gtache.lsp.client.connection

import java.io.{IOException, InputStream, OutputStream}
import java.net.{ServerSocket, Socket}
import java.util.Objects

import com.intellij.openapi.diagnostic.Logger


class ProcessOverSocketStreamConnectionProvider(commands: Seq[String], workingDir: String, port: Int = 0) extends ProcessStreamConnectionProvider(commands, workingDir) {

  import ProcessOverSocketStreamConnectionProvider._

  private var socket: Socket = _
  private var inputStream: InputStream = _
  private var outputStream: OutputStream = _

  @throws[IOException]
  override def start(): Unit = {
    val serverSocket = new ServerSocket(port)
    val socketThread = new Thread(() => {
      try
        socket = serverSocket.accept
      catch {
        case e: IOException =>
          LOG.error(e)
      } finally try
        serverSocket.close()
      catch {
        case e: IOException =>
          LOG.error(e)
      }
    })
    socketThread.start()
    super.start()
    try {
      socketThread.join(5000)
    }
    catch {
      case e: InterruptedException =>
        LOG.error(e)
    }
    if (socket == null) throw new IOException("Unable to make socket connection: " + toString) //$NON-NLS-1$
    inputStream = socket.getInputStream
    outputStream = socket.getOutputStream
  }

  override def getInputStream: InputStream = inputStream

  override def getOutputStream: OutputStream = outputStream

  override def getErrorStream: InputStream = inputStream

  override def stop(): Unit = {
    super.stop()
    if (socket != null) try
      socket.close()
    catch {
      case e: IOException =>
        LOG.error(e)
    }
  }

  override def hashCode: Int = {
    val result = super.hashCode
    result ^ Objects.hashCode(this.port)
  }
}

object ProcessOverSocketStreamConnectionProvider {
  private val LOG = Logger.getInstance(classOf[ProcessOverSocketStreamConnectionProvider])
} 
Example 142
Source File: RconConnector.scala    From chatoverflow   with Eclipse Public License 2.0 5 votes vote down vote up
package org.codeoverflow.chatoverflow.requirement.service.rcon

import java.io.{DataInputStream, IOException, InputStream, OutputStream}
import java.net.{Socket, SocketException}
import java.nio.{ByteBuffer, ByteOrder}
import java.util.Random

import org.codeoverflow.chatoverflow.WithLogger
import org.codeoverflow.chatoverflow.connector.Connector

class RconConnector(override val sourceIdentifier: String) extends Connector(sourceIdentifier) with WithLogger {
  override protected var requiredCredentialKeys: List[String] = List("password", "address")
  override protected var optionalCredentialKeys: List[String] = List("port")

  private var socket: Socket = _
  private var outputStream: OutputStream = _
  private var inputStream: InputStream = _
  private var requestId: Int = 0

  def sendCommand(command: String): String = {
    logger debug s"Sending $command to RCON"
    requestId += 1
    if (write(2, command.getBytes("ASCII"))) {
      return read()
    }
    null
  }


  
  override def stop(): Boolean = {
    logger info s"Stopped RCON connector to ${credentials.get.getValue("address").get}!"
    socket.close()
    true
  }
} 
Example 143
Source File: SerialInputImpl.scala    From chatoverflow   with Eclipse Public License 2.0 5 votes vote down vote up
package org.codeoverflow.chatoverflow.requirement.service.serial.impl

import java.io.InputStream

import org.codeoverflow.chatoverflow.WithLogger
import org.codeoverflow.chatoverflow.api.io.event.serial.{SerialDataAvailableEvent, SerialEvent}
import org.codeoverflow.chatoverflow.api.io.input.SerialInput
import org.codeoverflow.chatoverflow.registry.Impl
import org.codeoverflow.chatoverflow.requirement.impl.EventInputImpl
import org.codeoverflow.chatoverflow.requirement.service.serial.SerialConnector

@Impl(impl = classOf[SerialInput], connector = classOf[SerialConnector])
class SerialInputImpl extends EventInputImpl[SerialEvent, SerialConnector] with SerialInput with WithLogger {

  private val onInputFn = onInput _

  override def start(): Boolean = {
    sourceConnector.get.addInputListener(onInputFn)
    true
  }

  private def onInput(bytes: Array[Byte]): Unit = call(new SerialDataAvailableEvent(bytes))

  override def getInputStream: InputStream = sourceConnector.get.getInputStream

  
  override def stop(): Boolean = {
    sourceConnector.get.removeInputListener(onInputFn)
    true
  }
} 
Example 144
Source File: CheckPoint.scala    From eclair   with Apache License 2.0 5 votes vote down vote up
package fr.acinq.eclair.blockchain.electrum

import java.io.InputStream

import fr.acinq.bitcoin.{Block, ByteVector32, encodeCompact}
import fr.acinq.eclair.blockchain.electrum.db.HeaderDb
import org.json4s.JsonAST.{JArray, JInt, JString}
import org.json4s.jackson.JsonMethods


  def load(chainHash: ByteVector32, headerDb: HeaderDb): Vector[CheckPoint] = {
    val checkpoints = CheckPoint.load(chainHash)
    val checkpoints1 = headerDb.getTip match {
      case Some((height, header)) =>
        val newcheckpoints = for {h <- checkpoints.size * RETARGETING_PERIOD - 1 + RETARGETING_PERIOD to height - RETARGETING_PERIOD by RETARGETING_PERIOD} yield {
          // we * should * have these headers in our db
          val cpheader = headerDb.getHeader(h).get
          val nextDiff = headerDb.getHeader(h + 1).get.bits
          CheckPoint(cpheader.hash, nextDiff)
        }
        checkpoints ++ newcheckpoints
      case None => checkpoints
    }
    checkpoints1
  }
} 
Example 145
Source File: HeaderCreator.scala    From sbt-header   with Apache License 2.0 5 votes vote down vote up
package de.heikoseeberger.sbtheader

import java.io.InputStream
import sbt.Logger
import scala.io.Codec

object HeaderCreator {

  def apply(
      fileType: FileType,
      commentStyle: CommentStyle,
      license: License,
      headerEmptyLine: Boolean,
      log: Logger,
      input: InputStream
  ): HeaderCreator =
    new HeaderCreator(fileType, commentStyle, license, headerEmptyLine, log, input)
}

final class HeaderCreator private (
    fileType: FileType,
    commentStyle: CommentStyle,
    license: License,
    headerEmptyLine: Boolean,
    log: Logger,
    input: InputStream
) {

  private val crlf          = """(?s)(?:.*)(\r\n)(?:.*)""".r
  private val cr            = """(?s)(?:.*)(\r)(?:.*)""".r
  private val headerPattern = commentStyle.pattern

  private val (firstLine, text) = {
    val fileContent =
      try scala.io.Source.fromInputStream(input)(Codec.UTF8).mkString
      finally input.close()
    fileType.firstLinePattern match {
      case Some(pattern) =>
        fileContent match {
          case pattern(first, rest) => (first, rest)
          case other                => ("", other)
        }
      case _ => ("", fileContent)
    }
  }

  log.debug(s"First line of file is:$newLine$firstLine")
  log.debug(s"Text of file is:$newLine$text")

  private val fileNewLine =
    text match {
      case crlf(_) => "\r\n"
      case cr(_)   => "\r"
      case _       => "\n"
    }

  private def newHeaderText(existingHeader: Option[String]) = {
    val suffix     = if (headerEmptyLine) "" else newLine
    val headerText = commentStyle(license, existingHeader).stripSuffix(suffix)
    val headerNewLine =
      headerText match {
        case crlf(_) => "\r\n"
        case cr(_)   => "\r"
        case _       => "\n"
      }
    headerText.replace(headerNewLine, fileNewLine)
  }

  private val modifiedText =
    text match {
      case headerPattern(existingText, body) =>
        val newText = newHeaderText(Some(existingText))
        if (newText == existingText) None
        else Some(firstLine + newText + body.replaceAll("""^\s+""", "")) // Trim left
      case body if body.isEmpty => None
      case body =>
        Some(firstLine + newHeaderText(None) + body.replaceAll("""^\s+""", "")) // Trim left
    }
  log.debug(s"Modified text of file is:$newLine$modifiedText")

  def createText: Option[String] =
    modifiedText
} 
Example 146
Source File: Compression.scala    From databus-maven-plugin   with GNU Affero General Public License v3.0 5 votes vote down vote up
package org.dbpedia.databus.lib

import better.files._
import java.io.{BufferedInputStream, FileInputStream, InputStream}
import com.codahale.metrics.MetricRegistry
import org.apache.commons.compress.archivers.{ArchiveEntry, ArchiveException, ArchiveInputStream, ArchiveStreamFactory}
import org.apache.commons.compress.compressors.{CompressorException, CompressorInputStream, CompressorStreamFactory}

import scala.util.Try

object Compression {

  def detectCompression(datafile: File): Option[String] = {
    try {
      Some(datafile.inputStream.map(_.buffered).apply(CompressorStreamFactory.detect))
    } catch {
      case ce: CompressorException => None
    }
  }

  def detectArchive(datafile: File): Option[String] = {
    try {
      Some(datafile.inputStream.map(_.buffered).apply(ArchiveStreamFactory.detect))
    } catch {
      case ce: ArchiveException => None
    }
  }
} 
Example 147
Source File: CommandExecutor.scala    From renku   with Apache License 2.0 5 votes vote down vote up
package ch.renku.acceptancetests.tooling.console

import java.io.{File, InputStream}
import java.nio.file.Path
import java.util
import java.util.concurrent.ConcurrentLinkedQueue

import cats.effect.IO
import cats.implicits._
import ch.renku.acceptancetests.model.users.UserCredentials
import ch.renku.acceptancetests.tooling.TestLogger.logger
import ch.renku.acceptancetests.tooling.console.Command.UserInput

import scala.jdk.CollectionConverters._
import scala.language.postfixOps
import scala.sys.process._

private class CommandExecutor(command: Command) {

  def execute(implicit workPath: Path, userCredentials: UserCredentials): String = {

    implicit val output: util.Collection[String] = new ConcurrentLinkedQueue[String]()

    IO {
      executeCommand
      output.asString
    } recoverWith consoleException
  }.unsafeRunSync()

  def safeExecute(implicit workPath: Path, userCredentials: UserCredentials): String = {
    implicit val output: util.Collection[String] = new ConcurrentLinkedQueue[String]()

    IO {
      executeCommand
      output.asString
    } recover outputAsString
  }.unsafeRunSync()

  private def executeCommand(implicit workPath: Path,
                             output:            util.Collection[String],
                             userCredentials:   UserCredentials): Unit =
    command.userInputs.foldLeft(buildProcess) { (process, userInput) =>
      process #< userInput.asStream
    } lazyLines ProcessLogger(logLine _) foreach logLine

  private def buildProcess(implicit workPath: Path) =
    command.maybeFileName.foldLeft(Process(command.toString.stripMargin, workPath.toFile)) { (process, fileName) =>
      process #>> new File(workPath.toUri resolve fileName.value)
    }

  private def logLine(
      line:          String
  )(implicit output: util.Collection[String], userCredentials: UserCredentials): Unit = line.trim match {
    case "" => ()
    case line =>
      val obfuscatedLine = line.replace(userCredentials.password.value, "###")
      output add obfuscatedLine
      logger debug obfuscatedLine
  }

  private def consoleException(implicit output: util.Collection[String]): PartialFunction[Throwable, IO[String]] = {
    case _ =>
      ConsoleException {
        s"$command failed with:\n${output.asString}"
      }.raiseError[IO, String]
  }

  private def outputAsString(implicit output: util.Collection[String]): PartialFunction[Throwable, String] = {
    case _ => output.asString
  }

  private implicit class OutputOps(output: util.Collection[String]) {
    lazy val asString: String = output.asScala.mkString("\n")
  }

  private implicit class UserInputOps(userInput: UserInput) {
    import java.nio.charset.StandardCharsets.UTF_8

    lazy val asStream: InputStream = new java.io.ByteArrayInputStream(
      userInput.value.getBytes(UTF_8.name)
    )
  }
} 
Example 148
Source File: WholeFileInputFormat.scala    From flink-tensorflow   with Apache License 2.0 5 votes vote down vote up
package org.apache.flink.contrib.tensorflow.io

import java.io.{EOFException, IOException, InputStream}

import org.apache.flink.api.common.io.FileInputFormat
import org.apache.flink.configuration.Configuration
import org.apache.flink.core.fs._
import org.apache.flink.util.Preconditions.checkState


  @throws[IOException]
  def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T

  // --------------------------------------------------------------------------------------------
  //  Lifecycle
  // --------------------------------------------------------------------------------------------

  override def nextRecord(reuse: T): T = {
    checkState(!reachedEnd())
    checkState(currentSplit != null && currentSplit.getStart == 0)
    checkState(stream != null)
    readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength)
  }

  override def reachedEnd(): Boolean = {
    stream.getPos != 0
  }
}

@SerialVersionUID(1L)
object WholeFileInputFormat {

  @throws[IOException]
  def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = {
    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val buf = new Array[Byte](fileLength.toInt)
    readFully(fileStream, buf, 0, fileLength.toInt)
    buf
  }

  @throws[IOException]
  def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = {
    var bytesRead = 0
    while (bytesRead < len) {
      val read = inputStream.read(buf, off + bytesRead, len - bytesRead)
      if (read < 0) throw new EOFException("Premature end of stream")
      bytesRead += read
    }
    buf
  }
} 
Example 149
Source File: ZipUtil.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
import java.io.{ByteArrayOutputStream, File, FileInputStream, FileOutputStream, InputStream}

object ZipUtil {

  def addToZip(sourceZip: File, destZip: File, extra: Seq[(String, Array[Byte])]): Unit = {
    
    val is = new FileInputStream(sourceZip)
    val os = new FileOutputStream(destZip)
    val bootstrapZip = new ZipInputStream(is)
    val outputZip = new ZipOutputStream(os)

    def readFullySync(is: InputStream) = {
      val buffer = new ByteArrayOutputStream
      val data = Array.ofDim[Byte](16384)

      var nRead = is.read(data, 0, data.length)
      while (nRead != -1) {
        buffer.write(data, 0, nRead)
        nRead = is.read(data, 0, data.length)
      }

      buffer.flush()
      buffer.toByteArray
    }

    def zipEntries(zipStream: ZipInputStream): Iterator[(ZipEntry, Array[Byte])] =
      new Iterator[(ZipEntry, Array[Byte])] {
        private var nextEntry = Option.empty[ZipEntry]
        private def update() =
          nextEntry = Option(zipStream.getNextEntry)

        update()

        def hasNext = nextEntry.nonEmpty
        def next() = {
          val ent = nextEntry.get
          val data = readFullySync(zipStream)

          update()

          (ent, data)
        }
      }

    val extraNames = extra.map(_._1).toSet

    for ((ent, data) <- zipEntries(bootstrapZip) if !extraNames(ent.getName)) {

      // Same workaround as https://github.com/spring-projects/spring-boot/issues/13720
      // (https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2ff64cd06c0b25857e3e0dfdb6733174R144)
      ent.setCompressedSize(-1L)

      outputZip.putNextEntry(ent)
      outputZip.write(data)
      outputZip.closeEntry()
    }

    for ((dest, data) <- extra) {
      outputZip.putNextEntry(new ZipEntry(dest))
      outputZip.write(data)
      outputZip.closeEntry()
    }

    outputZip.close()

    is.close()
    os.close()

  }

} 
Example 150
Source File: PowershellRunner.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.env

import java.nio.charset.StandardCharsets
import java.util.Base64
import dataclass.data
import java.io.InputStream
import java.io.ByteArrayOutputStream

@data class PowershellRunner(
  powershellExePath: String = "powershell.exe",
  options: Seq[String] = PowershellRunner.defaultOptions,
  encodeProgram: Boolean = true
) {

  def runScript(script: String): String = {

    // inspired by https://github.com/soc/directories-jvm/blob/1f344ef0087e8422f6c7334317e73b8763d9e483/src/main/java/io/github/soc/directories/Util.java#L147
    val fullScript = "& {\n" +
      "[Console]::OutputEncoding = [System.Text.Encoding]::UTF8\n" +
      script +
      "\n}"

    val scriptArgs =
      if (encodeProgram) {
        val base64 = Base64.getEncoder()
        val encodedScript = base64.encodeToString(fullScript.getBytes(StandardCharsets.UTF_16LE))
        Seq("-EncodedCommand", encodedScript)
      } else
        Seq("-Command", fullScript)

    val command = Seq(powershellExePath) ++ options ++ scriptArgs

    val b = new ProcessBuilder(command: _*)
      .redirectInput(ProcessBuilder.Redirect.PIPE)
      .redirectOutput(ProcessBuilder.Redirect.PIPE)
      .redirectError(ProcessBuilder.Redirect.INHERIT)
    val p: Process = b.start()
    p.getOutputStream.close()
    val outputBytes = PowershellRunner.readFully(p.getInputStream)
    val retCode = p.waitFor()
    if (retCode == 0)
      new String(outputBytes, StandardCharsets.UTF_8)
    else
      throw new Exception(s"Error running powershell script (exit code: $retCode)")
  }

}

object PowershellRunner {

  def defaultOptions: Seq[String] =
    Seq("-NoProfile", "-NonInteractive")

  private def readFully(is: InputStream): Array[Byte] = {
    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ({
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }


} 
Example 151
Source File: FileUtil.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.launcher.internal

import java.io.{ByteArrayOutputStream, InputStream, OutputStream}
import java.nio.file.attribute.PosixFilePermission
import java.nio.file.{Files, LinkOption, Path}

import scala.collection.JavaConverters._

private[coursier] object FileUtil {

  // Won't be necessary anymore with Java 9
  // (https://docs.oracle.com/javase/9/docs/api/java/io/InputStream.html#readAllBytes--,
  // via https://stackoverflow.com/questions/1264709/convert-inputstream-to-byte-array-in-java/37681322#37681322)
  def readFullyUnsafe(is: InputStream): Array[Byte] = {
    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ({
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }

  def readFully(is: => InputStream): Array[Byte] = {
    var is0: InputStream = null
    try {
      is0 = is
      readFullyUnsafe(is0)
    } finally {
      if (is0 != null)
        is0.close()
    }
  }

  def withOutputStream[T](path: Path)(f: OutputStream => T): T = {
    var os: OutputStream = null
    try {
      os = Files.newOutputStream(path)
      f(os)
    } finally {
      if (os != null)
        os.close()
    }
  }

  def tryMakeExecutable(path: Path): Boolean = {

    try {
      val perms = Files.getPosixFilePermissions(path).asScala.toSet

      var newPerms = perms
      if (perms(PosixFilePermission.OWNER_READ))
        newPerms += PosixFilePermission.OWNER_EXECUTE
      if (perms(PosixFilePermission.GROUP_READ))
        newPerms += PosixFilePermission.GROUP_EXECUTE
      if (perms(PosixFilePermission.OTHERS_READ))
        newPerms += PosixFilePermission.OTHERS_EXECUTE

      if (newPerms != perms)
        Files.setPosixFilePermissions(
          path,
          newPerms.asJava
        )

      true
    } catch {
      case _: UnsupportedOperationException =>
        false
    }
  }

  def tryHideWindows(path: Path): Boolean =
    Windows.isWindows && {
      try {
        Files.setAttribute(path, "dos:hidden", java.lang.Boolean.TRUE, LinkOption.NOFOLLOW_LINKS)
        true
      } catch {
        case _: UnsupportedOperationException =>
          false
      }
    }

} 
Example 152
Source File: FileUtil.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.cache.internal

import java.io.{ByteArrayOutputStream, InputStream}

object FileUtil {

  // Won't be necessary anymore with Java 9
  // (https://docs.oracle.com/javase/9/docs/api/java/io/InputStream.html#readAllBytes--,
  // via https://stackoverflow.com/questions/1264709/convert-inputstream-to-byte-array-in-java/37681322#37681322)
  def readFullyUnsafe(is: InputStream): Array[Byte] = {
    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ({
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }

  def readFully(is: => InputStream): Array[Byte] = {
    var is0: InputStream = null
    try {
      is0 = is
      readFullyUnsafe(is0)
    } finally {
      if (is0 != null)
        is0.close()
    }
  }

  def withContent(is: InputStream, f: WithContent, bufferSize: Int = 16384): Unit = {
    val data = Array.ofDim[Byte](bufferSize)

    var nRead = is.read(data, 0, data.length)
    while (nRead != -1) {
      f(data, nRead)
      nRead = is.read(data, 0, data.length)
    }
  }

  trait WithContent {
    def apply(arr: Array[Byte], z: Int): Unit
  }

  class UpdateDigest(md: java.security.MessageDigest) extends FileUtil.WithContent {
    def apply(arr: Array[Byte], z: Int): Unit = md.update(arr, 0, z)
  }

} 
Example 153
Source File: MainClass.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.install

import java.io.{File, InputStream}
import java.util.jar.{Manifest => JManifest}
import java.util.zip.ZipFile

object MainClass {

  private def manifestPath = "META-INF/MANIFEST.MF"

  def mainClasses(jars: Seq[File]): Map[(String, String), String] = {

    val metaInfs = jars.flatMap { f =>
      val zf = new ZipFile(f)
      val entryOpt = Option(zf.getEntry(manifestPath))
      entryOpt.map(e => () => zf.getInputStream(e)).toSeq
    }

    val mainClasses = metaInfs.flatMap { f =>
      var is: InputStream = null
      val attributes =
        try {
          is = f()
          new JManifest(is).getMainAttributes
        } finally {
          if (is != null)
            is.close()
        }

      def attributeOpt(name: String) =
        Option(attributes.getValue(name))

      val vendor = attributeOpt("Implementation-Vendor-Id").getOrElse("")
      val title = attributeOpt("Specification-Title").getOrElse("")
      val mainClass = attributeOpt("Main-Class")

      mainClass.map((vendor, title) -> _)
    }

    mainClasses.toMap
  }

  def retainedMainClassOpt(
    mainClasses: Map[(String, String), String],
    mainDependencyOpt: Option[(String, String)]
  ): Option[String] =
    if (mainClasses.size == 1) {
      val (_, mainClass) = mainClasses.head
      Some(mainClass)
    } else {

      // Trying to get the main class of the first artifact
      val mainClassOpt = for {
        (mainOrg, mainName) <- mainDependencyOpt
        mainClass <- mainClasses.collectFirst {
          case ((org, name), mainClass)
            if org == mainOrg && (
              mainName == name ||
                mainName.startsWith(name + "_") // Ignore cross version suffix
              ) =>
            mainClass
        }
      } yield mainClass

      def sameOrgOnlyMainClassOpt = for {
        (mainOrg, mainName) <- mainDependencyOpt
        orgMainClasses = mainClasses.collect {
          case ((org, _), mainClass)
            if org == mainOrg =>
            mainClass
        }.toSet
        if orgMainClasses.size == 1
      } yield orgMainClasses.head

      mainClassOpt.orElse(sameOrgOnlyMainClassOpt)
    }

} 
Example 154
Source File: Confirm.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.cli.setup

import java.io.{InputStream, PrintStream}
import java.util.{Locale, Scanner}

import coursier.util.Task
import dataclass.data

import scala.annotation.tailrec

trait Confirm {
  def confirm(message: String, default: Boolean): Task[Boolean]
}

object Confirm {

  @data class ConsoleInput(
    in: InputStream = System.in,
    out: PrintStream = System.err,
    locale: Locale = Locale.getDefault,
    @since
    indent: Int = 0
  ) extends Confirm {
    private val marginOpt = if (indent > 0) Some(" " * indent) else None
    def confirm(message: String, default: Boolean): Task[Boolean] =
      Task.delay {

        val choice =
          if (default) "[Y/n]"
          else "[y/N]"

        val message0 = marginOpt match {
          case None => message
          case Some(margin) => message.linesIterator.map(margin + _).mkString(System.lineSeparator())
        }
        out.print(s"$message0 $choice ")

        @tailrec
        def loop(): Boolean = {
          val scanner = new Scanner(in)
          val resp = scanner.nextLine()
          val resp0 = resp
            .filter(!_.isSpaceChar)
            .toLowerCase(locale)
            .distinct

          resp0 match {
            case "y" => true
            case "n" => false
            case "" => default
            case _ =>
              out.print(s"Please answer Y or N. $choice ")
              loop()
          }
        }

        loop()
      }
  }

  @data class YesToAll(
    out: PrintStream = System.err
  ) extends Confirm {
    def confirm(message: String, default: Boolean): Task[Boolean] =
      Task.delay {
        out.println(message + " [Y/n] Y")
        true
      }
  }

  def default: Confirm =
    ConsoleInput()

} 
Example 155
Source File: ReaderInputStream.scala    From better-files   with MIT License 5 votes vote down vote up
package better.files

import java.io.{InputStream, Reader}
import java.nio.{ByteBuffer, CharBuffer}
import java.nio.charset.{Charset, CharsetEncoder, CoderResult, CodingErrorAction}

import scala.annotation.tailrec


  private[this] val encoderOut = ByteBuffer.allocate(bufferSize >> 4).flip().asInstanceOf[ByteBuffer]

  private[this] var lastCoderResult = CoderResult.UNDERFLOW
  private[this] var endOfInput      = false

  private[this] def fillBuffer() = {
    assert(!endOfInput)
    if (lastCoderResult.isUnderflow) {
      val position = encoderIn.compact().position()
      //  We don't use Reader#read(CharBuffer) here because it is more efficient to write directly to the underlying char array
      // since the default implementation copies data to a temporary char array anyway
      reader.read(encoderIn.array, position, encoderIn.remaining) match {
        case EOF => endOfInput = true
        case c   => encoderIn.position(position + c)
      }
      encoderIn.flip()
    }
    lastCoderResult = encoder.encode(encoderIn, encoderOut.compact(), endOfInput)
    encoderOut.flip()
  }

  override def read(b: Array[Byte], off: Int, len: Int) = {
    if (len < 0 || off < 0 || (off + len) > b.length)
      throw new IndexOutOfBoundsException("Array Size=" + b.length + ", offset=" + off + ", length=" + len)
    if (len == 0) {
      0 // Always return 0 if len == 0
    } else {
      var read = 0
      @tailrec def loop(off: Int, len: Int): Unit =
        if (len > 0) {
          if (encoderOut.hasRemaining) {
            val c = encoderOut.remaining min len
            encoderOut.get(b, off, c)
            read += c
            loop(off + c, len - c)
          } else if (!endOfInput) {
            fillBuffer()
            loop(off, len)
          }
        }
      loop(off, len)
      if (read == 0 && endOfInput) EOF else read
    }
  }

  @tailrec final override def read() = {
    if (encoderOut.hasRemaining) {
      encoderOut.get & 0xff
    } else if (endOfInput) {
      EOF
    } else {
      fillBuffer()
      read()
    }
  }

  override def close() = reader.close()
} 
Example 156
Source File: IOStreamOps.scala    From scala-server-lambda   with MIT License 5 votes vote down vote up
package io.github.howardjohn.lambda

import java.io.{InputStream, OutputStream}
import java.nio.charset.StandardCharsets

import scala.io.Source

object StreamOps {
  implicit class InputStreamOps(val is: InputStream) extends AnyVal {
    def consume(): String = {
      val contents = Source.fromInputStream(is).mkString
      is.close()
      contents
    }
  }

  implicit class OutputStreamOps(val os: OutputStream) extends AnyVal {
    def writeAndClose(contents: String): Unit = {
      os.write(contents.getBytes(StandardCharsets.UTF_8))
      os.close()
    }
  }
} 
Example 157
Source File: LambdaHandler.scala    From scala-server-lambda   with MIT License 5 votes vote down vote up
package io.github.howardjohn.lambda

import java.io.{InputStream, OutputStream}

import io.github.howardjohn.lambda.ProxyEncoding._
import io.github.howardjohn.lambda.StreamOps._

trait LambdaHandler {
  def handleRequest(request: ProxyRequest): ProxyResponse

  def handle(is: InputStream, os: OutputStream): Unit = {
    val rawInput = is.consume()
    val request = parseRequest(rawInput).fold(
      e => throw e,
      identity
    )
    val rawResponse = handleRequest(request)
    val response = encodeResponse(rawResponse)
    os.writeAndClose(response)
  }
} 
Example 158
Source File: InvokeMigrationHandler.scala    From flyway-awslambda   with MIT License 5 votes vote down vote up
package crossroad0201.aws.flywaylambda

import java.io.{BufferedOutputStream, InputStream, OutputStream, PrintWriter}

import com.amazonaws.regions.{Region, Regions}
import com.amazonaws.services.lambda.runtime.{Context, RequestStreamHandler}
import com.amazonaws.services.s3.{AmazonS3, AmazonS3Client}

import scala.io.{BufferedSource, Codec}
import scala.util.{Failure, Success, Try}

class InvokeMigrationHandler extends RequestStreamHandler with S3MigrationHandlerBase {
  type BucketName = String
  type Prefix = String
  type ConfFileName = String

  override def handleRequest(input: InputStream, output: OutputStream, context: Context): Unit = {
    def parseInput: Try[(BucketName, Prefix, ConfFileName)] = Try {
      import spray.json._
      import DefaultJsonProtocol._

      val json = new BufferedSource(input)(Codec("UTF-8")).mkString
      val jsObj = JsonParser(json).toJson.asJsObject
      jsObj.getFields(
        "bucket_name",
        "prefix"
      ) match {
        case Seq(JsString(b), JsString(p)) => {
          jsObj.getFields(
            "flyway_conf"
          ) match {
            case Seq(JsString(c)) => (b, p, c)
            case _ => (b, p, "flyway.conf")
          }
        }
        case _ => throw new IllegalArgumentException(s"Missing require key [bucketName, prefix]. - $json")
      }
    }

    val logger = context.getLogger

    implicit val s3Client: AmazonS3 = new AmazonS3Client().withRegion(Region.getRegion(Regions.fromName(sys.env("AWS_REGION"))))

    (for {
      i <- parseInput
      _ = { logger.log(s"Flyway migration start. by invoke lambda function(${i._1}, ${i._2}, ${i._3}).") }
      r <- migrate(i._1, i._2, i._3)(context, s3Client)
    } yield r) match {
      case Success(r) =>
        logger.log(r)
        val b = r.getBytes("UTF-8")
        val bout = new BufferedOutputStream(output)
        Stream.continually(bout.write(b))
        bout.flush()
      case Failure(e) =>
        e.printStackTrace()
        val w = new PrintWriter(output)
        w.write(e.toString)
        w.flush()
    }
  }

} 
Example 159
Source File: StreamReadingThread.scala    From ncdbg   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.programmaticallyspeaking.ncd.nashorn

import java.io.{BufferedReader, IOException, InputStream, InputStreamReader}

import scala.util.control.NonFatal

class StreamReadingThread(in: InputStream, appender: (String) => Unit) extends Thread {
  override def run(): Unit = {
    try {
      val reader = new BufferedReader(new InputStreamReader(in))
      var str = ""
      while (str != null) {
        str = reader.readLine()
        Option(str).foreach(appender)
      }
    } catch {
      case _: InterruptedException =>
        // ok
      case ex: IOException if isStreamClosed(ex) =>
        // ok
      case NonFatal(t) =>
        t.printStackTrace(System.err)
    }
  }

  private def isStreamClosed(ex: IOException) = ex.getMessage.toLowerCase == "stream closed"
} 
Example 160
Source File: AudioStreams.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark

import java.io.InputStream

import com.microsoft.cognitiveservices.speech.audio.PullAudioInputStreamCallback


class WavStream(val wavStream: InputStream) extends PullAudioInputStreamCallback {

  val stream = parseWavHeader(wavStream)

  override def read(dataBuffer: Array[Byte]): Int = {
    Math.max(0, stream.read(dataBuffer, 0, dataBuffer.length))
  }

  override def close(): Unit = {
    stream.close()
  }

  // region Wav File helper functions
  private def readUInt32(inputStream: InputStream) = {
    (0 until 4).foldLeft(0) { case (n, i) => n | inputStream.read << (i * 8) }
  }

  private def readUInt16(inputStream: InputStream) = {
    (0 until 2).foldLeft(0) { case (n, i) => n | inputStream.read << (i * 8) }
  }

  //noinspection ScalaStyle
  def parseWavHeader(reader: InputStream): InputStream = {
    // Tag "RIFF"
    val data = new Array[Byte](4)
    var numRead = reader.read(data, 0, 4)
    assert((numRead == 4) && (data sameElements "RIFF".getBytes), "RIFF")

    // Chunk size
    val fileLength = readUInt32(reader)

    numRead = reader.read(data, 0, 4)
    assert((numRead == 4) && (data sameElements "WAVE".getBytes), "WAVE")

    numRead = reader.read(data, 0, 4)
    assert((numRead == 4) && (data sameElements "fmt ".getBytes), "fmt ")

    val formatSize = readUInt32(reader)
    assert(formatSize >= 16, "formatSize")

    val formatTag = readUInt16(reader)
    val channels = readUInt16(reader)
    val samplesPerSec = readUInt32(reader)
    val avgBytesPerSec = readUInt32(reader)
    val blockAlign = readUInt16(reader)
    val bitsPerSample = readUInt16(reader)
    assert(formatTag == 1, "PCM") // PCM

    assert(channels == 1, "single channel")
    assert(samplesPerSec == 16000, "samples per second")
    assert(bitsPerSample == 16, "bits per sample")

    // Until now we have read 16 bytes in format, the rest is cbSize and is ignored
    // for now.
    if (formatSize > 16) {
      numRead = reader.read(new Array[Byte]((formatSize - 16).toInt))
      assert(numRead == (formatSize - 16), "could not skip extended format")
    }
    // Second Chunk, data
    // tag: data.
    numRead = reader.read(data, 0, 4)
    assert((numRead == 4) && (data sameElements "data".getBytes))

    val dataLength = readUInt32(reader)
    reader
  }
}

class CompressedStream(val stream: InputStream) extends PullAudioInputStreamCallback {

  override def read(dataBuffer: Array[Byte]): Int = {
    Math.max(0, stream.read(dataBuffer, 0, dataBuffer.length))
  }

  override def close(): Unit = {
    stream.close()
  }

} 
Example 161
Source File: Schema.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.downloader

import java.io.InputStream
import java.net.URI
import org.apache.commons.codec.digest.DigestUtils
import spray.json._

import scala.collection.JavaConversions._
import scala.collection.JavaConverters._

private[spark] object NamingConventions {

  def canonicalModelFilename(name: String, dataset: String): String =
    s"${name}_$dataset.model"

  def canonicalModelFilename(model: ModelSchema): String =
    s"${model.name}_${model.dataset}.model"

}


case class ModelSchema(name: String,
                       dataset: String,
                       modelType: String,
                       override val uri: URI,
                       override val hash: String,
                       override val size: Long,
                       inputNode: Int,
                       numLayers: Int,
                       layerNames: Array[String])
  extends Schema(uri, hash, size) {

  def this(name: String, dataset: String, modelType: String,
           uri: URI, hash: String, size: Long, inputNode: Int, numLayers: Int,
           layerNames: java.util.ArrayList[String]) = {
    this(name, dataset, modelType, uri, hash, size,
      inputNode, numLayers, layerNames.toList.toArray)
  }

  override def updateURI(newURI: URI): this.type = this.copy(uri = newURI).asInstanceOf[this.type]

}

private[spark] object SchemaJsonProtocol extends DefaultJsonProtocol {

  implicit object URIJsonFormat extends JsonFormat[URI] {
    def write(u: URI): JsValue = {
      JsString(u.toString)
    }

    def read(value: JsValue): URI = new URI(value.asInstanceOf[JsString].value)
  }

  implicit val ModelSchemaFormat: RootJsonFormat[ModelSchema] =
    jsonFormat(ModelSchema.apply,
      "name", "dataset", "modelType", "uri", "hash", "size", "inputNode", "numLayers", "layerNames")

} 
Example 162
Source File: ContextObjectInputStream.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.utils

import java.io.{InputStream, ObjectInputStream, ObjectStreamClass}

class ContextObjectInputStream(input: InputStream) extends ObjectInputStream(input) {
  protected override def resolveClass(desc: ObjectStreamClass): Class[_] = {
    try {
      Class.forName(desc.getName, false, Thread.currentThread().getContextClassLoader)
    } catch {
      case _: ClassNotFoundException => super.resolveClass(desc)
    }
  }
} 
Example 163
Source File: StreamUtilities.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.{ByteArrayOutputStream, InputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils

import scala.io.Source
import scala.util.Random

object StreamUtilities {

  import scala.util.{Failure, Success, Try}
  def usingMany[T <: AutoCloseable, U](disposable: Seq[T])(task: Seq[T] => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.foreach(d => d.close())
    }
  }

  def using[T <: AutoCloseable, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  def usingSource[T <: Source, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  
  class ZipIterator(stream: InputStream, zipfile: String, random: Random, sampleRatio: Double = 1)
    extends Iterator[(String, Array[Byte])] {

    private val zipStream = new ZipInputStream(stream)

    private def getNext: Option[(String, Array[Byte])] = {
      var entry = zipStream.getNextEntry
      while (entry != null) {
        if (!entry.isDirectory && random.nextDouble < sampleRatio) {

          val filename = zipfile + java.io.File.separator + entry.getName

          //extracting all bytes of a given entry
          val byteStream = new ByteArrayOutputStream
          IOUtils.copy(zipStream, byteStream)
          val bytes = byteStream.toByteArray

          assert(bytes.length == entry.getSize,
            "incorrect number of bytes is read from zipstream: " + bytes.length + " instead of " + entry.getSize)

          return Some((filename, bytes))
        }
        entry = zipStream.getNextEntry
      }

      stream.close()
      None
    }

    private var nextValue = getNext

    def hasNext: Boolean = nextValue.isDefined

    def next: (String, Array[Byte]) = {
      val result = nextValue.get
      nextValue = getNext
      result
    }
  }

} 
Example 164
Source File: AmandroidSettings.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.amandroid.core

import java.io.{File, FileInputStream, InputStream}

import org.ini4j.Wini
import org.argus.jawa.core.util.FileUtil


class AmandroidSettings(amandroid_home: String, iniPathOpt: Option[String]) {
  private val amandroid_home_uri = FileUtil.toUri(amandroid_home)
  private def defaultLibFiles =
    amandroid_home + "/androidSdk/android-25/android.jar" + java.io.File.pathSeparator +
    amandroid_home + "/androidSdk/support/v4/android-support-v4.jar" + java.io.File.pathSeparator +
    amandroid_home + "/androidSdk/support/v13/android-support-v13.jar" + java.io.File.pathSeparator +
    amandroid_home + "/androidSdk/support/v7/android-support-v7-appcompat.jar"
  private def defaultThirdPartyLibFile = amandroid_home + "/liblist.txt"
  private val iniUri = {
    iniPathOpt match {
      case Some(path) => FileUtil.toUri(path)
      case None => FileUtil.appendFileName(amandroid_home_uri, "config.ini")
    }
  }
  private val ini = new Wini(FileUtil.toFile(iniUri))
  def timeout: Int = Option(ini.get("analysis", "timeout", classOf[Int])).getOrElse(5)
  def dependence_dir: Option[String] = Option(ini.get("general", "dependence_dir", classOf[String]))
  def debug: Boolean = ini.get("general", "debug", classOf[Boolean])
  def lib_files: String = Option(ini.get("general", "lib_files", classOf[String])).getOrElse(defaultLibFiles)
  def third_party_lib_file: String = Option(ini.get("general", "third_party_lib_file", classOf[String])).getOrElse(defaultThirdPartyLibFile)
  def actor_conf_file: InputStream = Option(ini.get("concurrent", "actor_conf_file", classOf[String])) match {
    case Some(path) => new FileInputStream(path)
    case None => getClass.getResourceAsStream("/application.conf")
  }
  def static_init: Boolean = ini.get("analysis", "static_init", classOf[Boolean])
  def parallel: Boolean = ini.get("analysis", "parallel", classOf[Boolean])
  def k_context: Int = ini.get("analysis", "k_context", classOf[Int])
  def sas_file: String = Option(ini.get("analysis", "sas_file", classOf[String])).getOrElse(amandroid_home + File.separator + "taintAnalysis" + File.separator + "sourceAndSinks" + File.separator + "TaintSourcesAndSinks.txt")
  def native_sas_file: String = Option(ini.get("analysis", "sas_file", classOf[String])).getOrElse(amandroid_home + File.separator + "taintAnalysis" + File.separator + "sourceAndSinks" + File.separator + "NativeSourcesAndSinks.txt")
  def injection_sas_file: String = Option(ini.get("analysis", "injection_sas_file", classOf[String])).getOrElse(amandroid_home + File.separator + "taintAnalysis" + File.separator + "sourceAndSinks" + File.separator + "IntentInjectionSourcesAndSinks.txt")
} 
Example 165
Source File: ZipUtil.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.jawa.core.util

import java.io.{File, FileOutputStream, InputStream, OutputStream}
import java.util.zip.{ZipEntry, ZipFile}

import scala.collection.JavaConverters._


object ZipUtil {
  val BUFSIZE = 4096
  val buffer = new Array[Byte](BUFSIZE)

  def unZip(source: String, targetFolder: String): Boolean = {
    val zipFile = new ZipFile(source)

    unzipAllFile(zipFile.entries.asScala.toList, getZipEntryInputStream(zipFile), new File(targetFolder))
  }

  def getZipEntryInputStream(zipFile: ZipFile)(entry: ZipEntry): InputStream = zipFile.getInputStream(entry)

  def unzipAllFile(entryList: List[ZipEntry], inputGetter: ZipEntry => InputStream, targetFolder: File): Boolean = {

    entryList match {
      case entry :: entries =>

        if (entry.isDirectory)
          new File(targetFolder, entry.getName).mkdirs
        else
          saveFile(inputGetter(entry), new FileOutputStream(new File(targetFolder, entry.getName)))

        unzipAllFile(entries, inputGetter, targetFolder)
      case _ =>
        true
    }

  }

  def saveFile(fis: InputStream, fos: OutputStream): Unit = {
    writeToFile(bufferReader(fis), fos)
    fis.close()
    fos.close()
  }

  def bufferReader(fis: InputStream)(buffer: Array[Byte]): (Int, Array[Byte]) = (fis.read(buffer), buffer)

  def writeToFile(reader: Array[Byte] => (Int, Array[Byte]), fos: OutputStream): Boolean = {
    val (length, data) = reader(buffer)
    if (length >= 0) {
      fos.write(data, 0, length)
      writeToFile(reader, fos)
    } else
      true
  }
} 
Example 166
Source File: Parser.scala    From uap-scala   with Do What The F*ck You Want To Public License 5 votes vote down vote up
package org.uaparser.scala

import java.io.InputStream
import java.util.{ List => JList, Map => JMap }
import org.uaparser.scala.Device.DeviceParser
import org.uaparser.scala.OS.OSParser
import org.uaparser.scala.UserAgent.UserAgentParser
import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.constructor.SafeConstructor
import scala.collection.JavaConverters._
import scala.util.Try

case class Parser(userAgentParser: UserAgentParser, osParser: OSParser, deviceParser: DeviceParser)
    extends UserAgentStringParser {
  def parse(agent: String): Client =
    Client(userAgentParser.parse(agent), osParser.parse(agent), deviceParser.parse(agent))
}

object Parser {
  def fromInputStream(source: InputStream): Try[Parser] = Try {
    val yaml = new Yaml(new SafeConstructor)
    val javaConfig = yaml.load(source).asInstanceOf[JMap[String, JList[JMap[String, String]]]]
    val config = javaConfig.asScala.toMap.mapValues(_.asScala.toList.map(_.asScala.toMap.filterNot {
      case (_ , value) => value eq null
    }))
    val userAgentParser = UserAgentParser.fromList(config.getOrElse("user_agent_parsers", Nil))
    val osParser = OSParser.fromList(config.getOrElse("os_parsers", Nil))
    val deviceParser = DeviceParser.fromList(config.getOrElse("device_parsers", Nil))
    Parser(userAgentParser, osParser, deviceParser)
  }
  def default: Parser = fromInputStream(this.getClass.getResourceAsStream("/regexes.yaml")).get
} 
Example 167
Source File: CachingParser.scala    From uap-scala   with Do What The F*ck You Want To Public License 5 votes vote down vote up
package org.uaparser.scala

import java.io.InputStream
import java.util.{ Collections, LinkedHashMap, Map => JMap }
import scala.util.Try

case class CachingParser(parser: Parser, maxEntries: Int) extends UserAgentStringParser {
  lazy val clients: JMap[String, Client] = Collections.synchronizedMap(
    new LinkedHashMap[String, Client](maxEntries + 1, 1.0f, true) {
      override protected def removeEldestEntry(eldest: JMap.Entry[String, Client]): Boolean =
        super.size > maxEntries
    }
  )
  def parse(agent: String): Client = Option(clients.get(agent)).getOrElse {
    val client = parser.parse(agent)
    clients.put(agent, client)
    client
  }
}

object CachingParser {
  val defaultCacheSize: Int = 1000
  def fromInputStream(source: InputStream, size: Int = defaultCacheSize): Try[CachingParser] =
    Parser.fromInputStream(source).map(CachingParser(_, size))
  def default(size: Int = defaultCacheSize): CachingParser = CachingParser(Parser.default, size)
} 
Example 168
Source File: ByteTrackingInputStream.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils

import java.io.InputStream

import is.hail.io.fs.Seekable

class ByteTrackingInputStream(base: InputStream) extends InputStream {
  var bytesRead = 0L

  def bytesReadAndClear(): Long = {
    val n = bytesRead
    bytesRead = 0L
    n
  }

  override def read(): Int = {
    bytesRead += 1
    base.read()
  }

  override def read(b: Array[Byte]): Int = {
    val n = base.read(b)
    bytesRead += n
    n
  }

  override def read(b: Array[Byte], off: Int, len: Int): Int = {
    val n = base.read(b, off, len)
    bytesRead += n
    n
  }

  override def close(): Unit = base.close()

  def seek(offset: Long): Unit = {
    base match {
      case base: Seekable =>
        base.seek(offset)
      case base: org.apache.hadoop.fs.Seekable =>
        base.seek(offset)
    }
  }
} 
Example 169
Source File: RichInputStream.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils.richUtils

import java.io.InputStream
import is.hail.utils._

class RichInputStream(val in: InputStream) extends AnyVal {
  def readFully(to: Array[Byte], toOff: Int, n: Int): Unit = {
    val nRead = readRepeatedly(to, toOff, n)
    if (nRead < n) fatal(s"Premature end of file: expected $n bytes, found $nRead")
  }

  def readRepeatedly(to: Array[Byte], toOff: Int, n: Int): Int = {
    assert(toOff + n <= to.length)
    var read = 0
    var endOfStream = false
    while (read < n && !endOfStream) {
      val r = in.read(to, toOff + read, n - read)
      if (r <= 0)
        endOfStream = true
      else
        read += r
    }
    read
  }

  def readRepeatedly(to: Array[Byte]): Int = readRepeatedly(to, 0, to.length)
} 
Example 170
Source File: HTTPClient.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils

import java.net.URL
import java.io.OutputStream
import java.io.InputStream
import java.net.HttpURLConnection
import is.hail.utils._
import java.nio.charset.StandardCharsets
import org.apache.commons.io.output.ByteArrayOutputStream


object HTTPClient {
  def post[T](
    url: String,
    contentLength: Int,
    writeBody: OutputStream => Unit,
    readResponse: InputStream => T = (_: InputStream) => (),
    chunkSize: Int = 0
  ): T = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod("POST")
    if (chunkSize > 0)
      conn.setChunkedStreamingMode(chunkSize)
    conn.setDoOutput(true);
    conn.setRequestProperty("Content-Length", Integer.toString(contentLength))
    using(conn.getOutputStream())(writeBody)
    assert(200 <= conn.getResponseCode() && conn.getResponseCode() < 300,
      s"POST ${url} ${conn.getResponseCode()} ${using(conn.getErrorStream())(fullyReadInputStreamAsString)}")
    val result = using(conn.getInputStream())(readResponse)
    conn.disconnect()
    result
  }

  def get[T](
    url: String,
    readResponse: InputStream => T
  ): T = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod("GET")
    assert(200 <= conn.getResponseCode() && conn.getResponseCode() < 300,
      s"GET ${url} ${conn.getResponseCode()} ${using(conn.getErrorStream())(fullyReadInputStreamAsString)}")
    val result = using(conn.getInputStream())(readResponse)
    conn.disconnect()
    result
  }

  def delete(
    url: String,
    readResponse: InputStream => Unit = (_: InputStream) => ()
  ): Unit = {
    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    conn.setRequestMethod("DELETE")
    assert(200 <= conn.getResponseCode() && conn.getResponseCode() < 300,
      s"DELETE ${url} ${conn.getResponseCode()} ${using(conn.getErrorStream())(fullyReadInputStreamAsString)}")
    val result = using(conn.getInputStream())(readResponse)
    conn.disconnect()
    result
  }

  private[this] def fullyReadInputStreamAsString(is: InputStream): String =
    using(new ByteArrayOutputStream()) { baos =>
      drainInputStreamToOutputStream(is, baos)
      new String(baos.toByteArray(), StandardCharsets.UTF_8)
    }
} 
Example 171
Source File: RestartableByteArrayInputStream.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.utils

import java.io.{ IOException, InputStream }

// not thread safe
class RestartableByteArrayInputStream extends InputStream {
  private[this] var off: Int = 0
  private[this] var end: Int = 0
  private[this] var buf: Array[Byte] = null
  def this(buf: Array[Byte]) {
    this()
    restart(buf)
  }

  override def read(): Int = {
    if (off == end) {
      return -1
    }
    val b = buf(off) & 0xff
    off += 1
    b
  }
  override def read(dest: Array[Byte]): Int =
    read(dest, 0, dest.length)
  override def read(dest: Array[Byte], destOff: Int, requestedLength: Int): Int = {
    val length = math.min(requestedLength, end - off)
    System.arraycopy(buf, off, dest, destOff, length)
    off += length
    length
  }
  override def skip(n: Long): Long = {
    if (n <= 0) {
      return 0
    }
    val skipped = math.min(
      math.min(n, Integer.MAX_VALUE).toInt,
      end - off)
    off += skipped
    skipped
  }
  override def available(): Int =
    end - off
  override def markSupported(): Boolean = false
  override def mark(readAheadLimit: Int): Unit =
    throw new IOException("unsupported operation")
  override def reset(): Unit =
    throw new IOException("unsupported operation")
  override def close(): Unit =
    buf = null
  def restart(buf: Array[Byte]): Unit =
    restart(buf, 0, buf.length)
  def restart(buf: Array[Byte], start: Int, end: Int): Unit = {
    require(start >= 0)
    require(start <= end)
    require(end <= buf.length)
    this.buf = buf
    this.off = start
    this.end = end
  }
} 
Example 172
Source File: BGZipBlocks.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.misc

import java.io.InputStream

import is.hail.io.compress.BGzipInputStream
import is.hail.io.fs.FS

object BGZipBlocks {
  //Print block starts of block gzip (bgz) file
  def apply(fs: FS, file: String) {
    var buf = new Array[Byte](64 * 1024)

    // position of 'buf[0]' in input stream
    var bufPos = 0L

    var bufSize = 0
    var posInBuf = 0

    def fillBuf(is: InputStream) {
      val newSize = bufSize - posInBuf
      assert(newSize >= 0)

      System.arraycopy(buf, posInBuf, buf, 0, newSize)
      bufPos += posInBuf
      bufSize = newSize
      posInBuf = 0

      def f() {
        val needed = buf.length - bufSize
        if (needed > 0) {
          val result = is.read(buf, bufSize, needed)
          if (result > 0) {
            bufSize += result
            f()
          }
        }
      }

      f()
    }

    // no decompression codec
    val is = fs.open(file)

    fillBuf(is)

    while (bufSize > 0) {
      val h = new BGzipInputStream.BGzipHeader(buf, posInBuf, bufSize)
      println(bufPos)
      posInBuf += h.getBlockSize
      fillBuf(is)
    }

    is.close()
  }
} 
Example 173
Source File: package.scala    From hail   with MIT License 5 votes vote down vote up
package is

import java.io.InputStream

package object hail {

  private object HailBuildInfo {

    import java.util.Properties

    import is.hail.utils._

    val (
      hail_build_user: String,
      hail_revision: String,
      hail_branch: String,
      hail_build_date: String,
      hail_repo_url: String,
      hail_spark_version: String,
      hail_pip_version: String) = {

      loadFromResource[(String, String, String, String, String, String, String)]("build-info.properties") {
        (is: InputStream) =>
          val unknownProp = "<unknown>"
          val props = new Properties()
          props.load(is)
          (
            props.getProperty("user", unknownProp),
            props.getProperty("revision", unknownProp),
            props.getProperty("branch", unknownProp),
            props.getProperty("date", unknownProp),
            props.getProperty("url", unknownProp),
            props.getProperty("sparkVersion", unknownProp),
            props.getProperty("hailPipVersion", unknownProp)
            )
      }
    }
  }

  val HAIL_BUILD_USER = HailBuildInfo.hail_build_user
  val HAIL_REVISION = HailBuildInfo.hail_revision
  val HAIL_BRANCH = HailBuildInfo.hail_branch
  val HAIL_BUILD_DATE = HailBuildInfo.hail_build_date
  val HAIL_REPO_URL = HailBuildInfo.hail_repo_url
  val HAIL_SPARK_VERSION = HailBuildInfo.hail_spark_version
  val HAIL_PIP_VERSION = HailBuildInfo.hail_pip_version

  // FIXME: probably should use tags or something to choose English name
  val HAIL_PRETTY_VERSION = HAIL_PIP_VERSION + "-" + HAIL_REVISION.substring(0, 12)

} 
Example 174
Source File: CodecSpec.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream}

import is.hail.annotations.{Region, RegionValue}
import is.hail.asm4s.{Code, TypeInfo, Value}
import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo}
import is.hail.types.encoded.EType
import is.hail.types.physical.PType
import is.hail.types.virtual.Type
import is.hail.rvd.RVDContext
import is.hail.sparkextras.ContextRDD
import is.hail.utils.using
import org.apache.spark.rdd.RDD

trait AbstractTypedCodecSpec extends Spec {
  def encodedType: EType
  def encodedVirtualType: Type

  type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit]
  type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T]

  def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder

  def decodedPType(requestedType: Type): PType

  def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder)

  def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset))
    baos.toByteArray
  }

  def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = {
    val bais = new ByteArrayInputStream(bytes)
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, dec(bais).readRegionValue(region))
  }

  def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer]

  def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T])

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = {
    val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb)
    assert(ti == typeToTypeInfo(requestedType))
    ptype -> dec
  }

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = {
    assert(ti == typeToTypeInfo(t))
    buildEmitEncoderF[T](t, cb)
  }

  // FIXME: is there a better place for this to live?
  def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = {
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) =>
      RegionValue.fromBytes(dec, ctx.region, it)
    })
  }

  override def toString: String = super[Spec].toString
} 
Example 175
Source File: DoubleInputBuffer.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.io

import java.io.{Closeable, InputStream, OutputStream}

import is.hail.annotations.Memory
import is.hail.utils._

final class DoubleInputBuffer(in: InputStream, bufSize: Int) extends Closeable {
  private val buf = new Array[Byte](bufSize)
  private var end: Int = 0
  private var off: Int = 0

  def close() {
    in.close()
  }

  def readDoubles(to: Array[Double]): Unit = readDoubles(to, 0, to.length)

  def readDoubles(to: Array[Double], toOff0: Int, n0: Int) {
    assert(toOff0 >= 0)
    assert(n0 >= 0)
    assert(toOff0 <= to.length - n0)

    var toOff = toOff0
    var n = n0.toLong

    while (n > 0) {
      if (end == off) {
        val len = math.min(bufSize, n << 3).toInt
        in.readFully(buf, 0, len)
        end = len
        off = 0
      }
      val p = math.min(end - off, n << 3).toInt >>> 3
      assert(p > 0)
      Memory.memcpy(to, toOff, buf, off, p)
      toOff += p
      n -= p
      off += (p << 3)
    }
  }
}

final class DoubleOutputBuffer(out: OutputStream, bufSize: Int) extends Closeable {
  private val buf: Array[Byte] = new Array[Byte](bufSize)
  private var off: Int = 0

  def close() {
    flush()
    out.close()
  }

  def flush() {
    out.write(buf, 0, off)
  }

  def writeDoubles(from: Array[Double]): Unit = writeDoubles(from, 0, from.length)

  def writeDoubles(from: Array[Double], fromOff0: Int, n0: Int) {
    assert(n0 >= 0)
    assert(fromOff0 >= 0)
    assert(fromOff0 <= from.length - n0)
    var fromOff = fromOff0
    var n = n0.toLong

    while (off + (n << 3) > bufSize) {
      val p = (buf.length - off) >>> 3
      Memory.memcpy(buf, off, from, fromOff, p)
      off += (p << 3)
      fromOff += p
      n -= p
      out.write(buf, 0, off)
      off = 0
    }
    Memory.memcpy(buf, off, from, fromOff, n)
    off += (n.toInt << 3)
  }
} 
Example 176
Source File: package.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.build

import java.io.InputStream

package object util {
  def resourceFromJarAsIStream(filename: String): InputStream = {
    val jarfile = {
      val r = this.getClass.getProtectionDomain.getCodeSource.getLocation.getFile
      if (r.head == '/') r.tail else r
    }
    val zip = new java.util.zip.ZipFile(jarfile)
    val ze = {
      val rv = zip.getEntry(filename)
      if (rv == null) {
        val toggleSlash = if (filename.head == '/') filename.drop(1) else s"/$filename"
        zip.getEntry(toggleSlash)
      } else rv
    }
    zip.getInputStream(ze)
  }
} 
Example 177
Source File: ChildFirstURLClassLoader.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.util.loading

import java.io.{File, InputStream}
import java.net.{URL, URLClassLoader}

// scalastyle:off
import sun.misc.CompoundEnumeration
// scalastyle:on
import scala.util.{Failure, Success, Try}


class ChildFirstURLClassLoader(urls: Array[URL], parent: ClassLoader, except: Seq[String] = Seq())
    extends URLClassLoader(urls, parent) {

  protected override def loadClass(name: String, resolve: Boolean): Class[_] = {
    def tryFind(findAction: => Class[_]): Option[Class[_]] = Try(findAction) match {
      case Failure(e: ClassNotFoundException) => None
      case Failure(e)                         => throw e
      case Success(c)                         => Some(c)
    }

    def loadLocally = if (except.exists(name.startsWith)) None else tryFind(findClass(name))
    def loadFromParent = if (getParent == null) None else tryFind(getParent.loadClass(name))

    val alreadyLoaded = findLoadedClass(name)
    if (alreadyLoaded != null) {
      alreadyLoaded
    } else {

      val `class` = loadLocally.getOrElse(loadFromParent.orNull)

      if (resolve)
        resolveClass(`class`)
      `class`
    }
  }

  override def getResource(name: String): URL = findResource(name) match {
    case null => super.getResource(name)
    case u    => u
  }

  override def getResources(name: String): java.util.Enumeration[URL] = {
    val parent = getParent
    val localUrls = findResources(name)
    val parentUrls: java.util.Enumeration[URL] =
      if (parent != null) parent.getResources(name) else java.util.Collections.emptyEnumeration()
    new CompoundEnumeration(Array(localUrls, parentUrls))
  }

  override def getResourceAsStream(name: String): InputStream = {
    getResource(name) match {
      case null => null
      case url =>
        Try(url.openStream) match {
          case Success(x) => x
          case Failure(_) => null
        }
    }
  }
}

object ChildFirstURLClassLoader {
  def loadClassFromJar[T](className: String, jarPath: String, commonPackageNames:String, excludes: Seq[String] = Seq()): T =
    Loader(jarPath, excludes :+ commonPackageNames).load(className)

  case class Loader(jarPath: String, excludes: Seq[String] = Seq()) {
    val urls = if(new java.io.File(jarPath).isFile) Array(new File(jarPath).toURI.toURL) else Array[URL](new URL(jarPath))
    private val cl =
      new ChildFirstURLClassLoader(urls, this.getClass.getClassLoader, excludes)
    def load[T](className: String) = cl.loadClass(className).newInstance.asInstanceOf[T]
  }

} 
Example 178
Source File: StringTests.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.util.string.test

import java.io.{ByteArrayInputStream, InputStream}

import org.scalatest.{FunSpec, Matchers}
import cmwell.util.string._

class StringTests extends FunSpec with Matchers {
  private def mkString(is: InputStream) = {
    val buffSrc = scala.io.Source.fromInputStream(is)
    val res = buffSrc.mkString
    buffSrc.close()
    res
  }

  describe("mapInputStreamLines should") {
    it("return empty for empty input") {
      val input = new ByteArrayInputStream(Array.emptyByteArray)
      val result = mapInputStreamLines(input)(identity)
      result.read() should be(-1)
      input.close()
      result.close()
    }
    it("provide the delimiter as well") {
      val delim = '\n'
      val s = "provide the\ndelimiter as well"
      val expectedAmount = s.count(delim.==)

      val input = stringToInputStream(s)
      val result = mapInputStreamLines(input)(_.toUpperCase)
      mkString(result).count(delim.==) should be(expectedAmount)
      input.close()
      result.close()
    }
    it("not end with the delimiter") {
      val input = stringToInputStream("not end with\nthe delimiter")
      val result = mapInputStreamLines(input)(_.toUpperCase)
      mkString(result).last should be('R')
      input.close()
      result.close()
    }
    it("handle a concat mapper") {
      val input = stringToInputStream("handle\na\nconcat\nmapper")
      val result = mapInputStreamLines(input)(_ + " not")
      mkString(result) should be("handle not\na not\nconcat not\nmapper not")
      input.close()
      result.close()
    }
  }

} 
Example 179
Source File: SparqlUtils.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.tools.data.sparql.japi

import java.io.InputStream

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Sink, StreamConverters}
import cmwell.tools.data.sparql.SparqlProcessor
import cmwell.tools.data.utils.akka.{concatByteStrings, endl}
import cmwell.tools.data.utils.chunkers.GroupChunker
import scala.concurrent.ExecutionContext.Implicits.global

import scala.concurrent.duration.FiniteDuration

object SparqlUtils {

  def createJavaStreamFromPaths(baseUrl: String,
                                parallelism: Int = 4,
                                isNeedWrapping: Boolean = true,
                                sparqlQuery: String,
                                in: InputStream) = {

    implicit val system = ActorSystem("reactive-sparql-processor")
    implicit val mat = ActorMaterializer()

    SparqlProcessor
      .createSourceFromPathsInputStream(
        baseUrl = baseUrl,
        spQueryParamsBuilder = (p: Seq[String], v: Map[String,String], q: Boolean) => "sp.pid=" + p.head.substring(p.head.lastIndexOf('-') + 1),
        parallelism = parallelism,
        isNeedWrapping = isNeedWrapping,
        sparqlQuery = sparqlQuery,
        in = in
      )
      .map { case (data, _) => data }
      .via(GroupChunker(GroupChunker.formatToGroupExtractor("ntriples")))
      .map(concatByteStrings(_, endl))
      .runWith(StreamConverters.asJavaStream())
  }

  def createJavaOutputStreamFromPaths(baseUrl: String,
                                      parallelism: Int = 4,
                                      isNeedWrapping: Boolean = true,
                                      sparqlQuery: String,
                                      in: InputStream,
                                      timeout: FiniteDuration) = {
    implicit val system = ActorSystem("reactive-sparql-processor")
    implicit val mat = ActorMaterializer()

    SparqlProcessor
      .createSourceFromPathsInputStream(
        baseUrl = baseUrl,
        spQueryParamsBuilder = (p: Seq[String], v: Map[String,String], q: Boolean) => "sp.pid=" + p.head.substring(p.head.lastIndexOf('-') + 1),
        parallelism = parallelism,
        isNeedWrapping = isNeedWrapping,
        sparqlQuery = sparqlQuery,
        in = in
      )
      .map { case (data, _) => data }
      .via(GroupChunker(GroupChunker.formatToGroupExtractor("ntriples")))
      .map(concatByteStrings(_, endl))
      .runWith(StreamConverters.asInputStream(timeout))
  }
} 
Example 180
Source File: DownloaderUtils.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.tools.data.downloader.streams.japi

import java.io.InputStream

import akka.Done
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Sink
import cmwell.tools.data.downloader.streams.Downloader
import cmwell.tools.data.utils.akka._

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future


  def fromQuery(host: String,
                path: String,
                params: String,
                qp: String,
                format: String,
                op: String,
                length: Option[Int],
                recursive: Boolean,
                onFinish: Runnable): Future[Done] = {

    implicit val system = ActorSystem("reactive-downloader")
    implicit val mat = ActorMaterializer()

    Downloader
      .downloadFromQuery(
        baseUrl = host,
        path = path,
        params = params,
        qp = qp,
        format = format,
        op = op,
        length = length,
        recursive = recursive,
        // scalastyle:off
        outputHandler = println
        // scalastyle:on
      )
      .andThen { case _ => cleanup() }
      .andThen { case _ => onFinish.run() }
  }

  def fromUuidInputStream(host: String, format: String, op: String, in: InputStream, onFinish: Runnable) = {

    implicit val system = ActorSystem("reactive-downloader")
    implicit val mat = ActorMaterializer()

    Downloader
      .downloadFromUuidInputStream(
        baseUrl = host,
        format = format,
        // scalastyle:off
        outputHandler = println,
        // scalastyle:on
        in = in
      )
      .andThen { case _ => cleanup() }
      .andThen { case _ => onFinish.run() }
  }
} 
Example 181
Source File: BlockLang.scala    From jgo   with GNU General Public License v3.0 5 votes vote down vote up
package jgo.tools.compiler
package parser

import scala.util.parsing.input.Reader

import lexer._
import scope._
import interm._
import interm.types._

import stmts._
import funcs._


class BlockLang(in: Reader[Token], res: List[Type] = Nil, resNamed: Boolean = false) extends FuncContext with Statements {
  //def, not val.  See comment in StackScoped
  def initialEnclosing = UniverseScope
  
  def targetFuncType = FuncType(Nil, res)
  def hasNamedResults = resNamed
  
  lazy val result = phrase(block)(in)
}

object BlockLang {
  import java.io.{File, InputStream, FileInputStream, InputStreamReader}
  import scala.collection.immutable.PagedSeq
  
  def apply(in: Reader[Char]):  BlockLang = new BlockLang(Scanner(in))
  def apply(inStr: String):     BlockLang = new BlockLang(Scanner(inStr))
  def apply(in: InputStream):   BlockLang = new BlockLang(Scanner(in))
  def apply(file: File):        BlockLang = new BlockLang(Scanner(file))
  
  def from(fileName: String):   BlockLang = new BlockLang(Scanner.from(fileName))
} 
Example 182
Source File: Scanner.scala    From jgo   with GNU General Public License v3.0 5 votes vote down vote up
package jgo.tools.compiler
package lexer

import scala.util.parsing._
import input._
import combinator._

//portions of this class taken from scala.util.parsing.combinator.lexical.Scanners#Scanner
final class Scanner private(prev: Option[Token], in: Reader[Char]) extends Reader[Token] {
  private def this(in: Reader[Char]) = this(None, in)
  
  private val (tok, remainingIn) = Lexical.token(prev, in)
  
  def      first = {  tok }
  lazy val rest  = new Scanner(Some(tok), remainingIn)
  lazy val pos   = Lexical.stripWhitespace(in).pos
  def      atEnd = tok == EOF
  
  override def source = in.source
  override def offset = in.offset
  
  def foreach[U](f: Token => U) {
    var cur = this
    while (!cur.atEnd) {
      f(cur.first)
      cur = cur.rest
    }
  }
}

object Scanner {
  import java.io.{File, InputStream, FileInputStream, InputStreamReader}
  import scala.collection.immutable.PagedSeq
  
  def apply(in: Reader[Char]): Scanner = new Scanner(None, in)
  def apply(inStr: String):    Scanner = new Scanner(new CharArrayReader(inStr.toCharArray()))
  def apply(in: File):         Scanner = apply(new FileInputStream(in))
  def apply(in: InputStream):  Scanner =
    new Scanner(None, new PagedSeqReader(PagedSeq.fromReader(new InputStreamReader(in , "UTF-8"))))
  
  def from(fileName: String): Scanner = apply(new FileInputStream(fileName))
} 
Example 183
Source File: LexTestAll.scala    From jgo   with GNU General Public License v3.0 5 votes vote down vote up
import jgo.tools.compiler._
import parser.BlockLang
import parser.combinatorExten._
import lexer._

import interm.codeseq._

import java.io.{File, InputStream, FileInputStream, InputStreamReader}

object LexTestAll {
  def main(args: Array[String]) {
    if (args.isEmpty)
      testAll(new File(System.getProperty("user.home") + "/Desktop/gotest/"))
    else
      testAll(new File(args(0)))
  }
  
  def testAll(dir: File) {
    for (file <- dir.listFiles)
      if (file.isDirectory)
        testAll(file)
      else if (file.isFile && !file.isHidden)
        test(file)
  }
  
  def test(file: File) {
    println("testing: " + file.getCanonicalPath)
    println()
    
    var cur = Scanner(file)
    print("tokenization: ")
    while (!cur.atEnd) {
      print(cur.first + " ")
      cur = cur.rest
    }
    println()
    println()
  }
} 
Example 184
Source File: Unpacker.scala    From comet-data-pipeline   with Apache License 2.0 5 votes vote down vote up
package com.ebiznext.comet.utils

import java.io.{BufferedInputStream, InputStream}
import java.nio.file.{Files, Paths}

import better.files.File
import org.apache.commons.compress.archivers.{
  ArchiveEntry,
  ArchiveInputStream,
  ArchiveStreamFactory
}
import org.apache.commons.compress.compressors.{CompressorInputStream, CompressorStreamFactory}
import org.apache.commons.compress.utils.IOUtils
import org.apache.commons.io.input.CloseShieldInputStream

import scala.util.Try

object Unpacker {

  def unpack(archiveFile: File, directory: File): Try[Unit] = {
    for {
      inputStream <- Try(Files.newInputStream(Paths.get(archiveFile.pathAsString)))
      it          <- open(inputStream)
    } yield {
      while (it.hasNext) {
        val (entry, is) = it.next()
        if (entry.isDirectory) {
          throw new Exception("Compressed archive cannot directories")
        }
        val targetFile = File(directory, entry.getName)
        val o = Files.newOutputStream(targetFile.path)
        try {
          IOUtils.copy(is, o)
        } finally {
          if (o != null) o.close()
        }
      }
    }
  }

  // https://alexwlchan.net/2019/09/unpacking-compressed-archives-in-scala/
  
  def open(inputStream: InputStream): Try[Iterator[(ArchiveEntry, InputStream)]] =
    for {
      uncompressedInputStream <- createUncompressedStream(inputStream)
      archiveInputStream      <- createArchiveStream(uncompressedInputStream)
      iterator = createIterator(archiveInputStream)
    } yield iterator

  private def createUncompressedStream(inputStream: InputStream): Try[CompressorInputStream] =
    Try {
      new CompressorStreamFactory().createCompressorInputStream(
        getMarkableStream(inputStream)
      )
    }

  private def createArchiveStream(
    uncompressedInputStream: CompressorInputStream
  ): Try[ArchiveInputStream] =
    Try {
      new ArchiveStreamFactory()
        .createArchiveInputStream(
          getMarkableStream(uncompressedInputStream)
        )
    }

  private def createIterator(
    archiveInputStream: ArchiveInputStream
  ): Iterator[(ArchiveEntry, InputStream)] =
    new Iterator[(ArchiveEntry, InputStream)] {
      var latestEntry: ArchiveEntry = _

      override def hasNext: Boolean = {
        latestEntry = archiveInputStream.getNextEntry
        latestEntry != null
      }

      override def next(): (ArchiveEntry, InputStream) =
        (latestEntry, new CloseShieldInputStream(archiveInputStream))
    }

  private def getMarkableStream(inputStream: InputStream): InputStream =
    if (inputStream.markSupported())
      inputStream
    else
      new BufferedInputStream(inputStream)

} 
Example 185
Source File: SchemaSpec.scala    From comet-data-pipeline   with Apache License 2.0 5 votes vote down vote up
package com.ebiznext.comet.schema.model

import java.io.{InputStream, StringWriter}

import com.ebiznext.comet.TestHelper
import com.ebiznext.comet.schema.handlers.SchemaHandler

class SchemaSpec extends TestHelper {

  new WithSettings() {
    val schemaHandler = new SchemaHandler(storageHandler)

    "Attribute type" should "be valid" in {
      val stream: InputStream =
        getClass.getResourceAsStream("/sample/default.yml")
      val lines =
        scala.io.Source.fromInputStream(stream).getLines().mkString("\n")
      val types = mapper.readValue(lines, classOf[Types])
      val attr = Attribute(
        "attr",
        "invalid-type", // should raise error non existent type
        Some(true),
        true,
        Some(
          PrivacyLevel("MD5")
        ) // Should raise an error. Privacy cannot be applied on types other than string
      )

      attr.checkValidity(schemaHandler) shouldBe Left(List("Invalid Type invalid-type"))
    }

    "Attribute privacy" should "appliable to any type" in {
      val attr = Attribute(
        "attr",
        "long",
        Some(true),
        true,
        Some(
          PrivacyLevel("ApproxLong(20)")
        ) // Should raise an error. Privacy cannot be applied on types other than stringsettings = settings
      )
      attr.checkValidity(schemaHandler) shouldBe Right(true)
    }

    "Sub Attribute" should "be present for struct types only" in {
      val attr = Attribute(
        "attr",
        "long",
        Some(true),
        true,
        Some(
          PrivacyLevel("ApproxLong(20)")
        ), // Should raise an error. Privacy cannot be applied on types other than string
        attributes = Some(List[Attribute]())
      )
      val expectedErrors = List(
        "Attribute Attribute(attr,long,Some(true),true,Some(ApproxLong(20)),None,None,None,Some(List()),None,None,None) : Simple attributes cannot have sub-attributes",
        "Attribute Attribute(attr,long,Some(true),true,Some(ApproxLong(20)),None,None,None,Some(List()),None,None,None) : when present, attributes list cannot be empty."
      )

      attr.checkValidity(schemaHandler) shouldBe Left(expectedErrors)
    }

    "Position serialization" should "output all fields" in {
      val yml = loadTextFile(s"/expected/yml/position_serialization_${versionSuffix}.yml")

      val attr =
        Attribute("hello", position = Some(Position(1, 2)))
      val writer = new StringWriter()
      mapper.writer().writeValue(writer, attr)
      logger.info("--" + writer.toString + "--")
      logger.info("++" + yml + "++")
      writer.toString.trim should equal(yml)
    }

    "Default value for an attribute" should "only be used for non obligatory fields" in {
      val requiredAttribute =
        Attribute("requiredAttribute", "long", required = true, default = Some("10"))
      requiredAttribute.checkValidity(schemaHandler) shouldBe Left(
        List(
          s"attribute with name ${requiredAttribute.name}: default value valid for optional fields only"
        )
      )

      val optionalAttribute =
        Attribute("optionalAttribute", "long", required = false, default = Some("10"))
      optionalAttribute.checkValidity(schemaHandler) shouldBe Right(true)
    }
  }
} 
Example 186
Source File: PackHelper.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import java.io.{ InputStream, OutputStream }
import chess.format.Uci
import chess.{ Pos, Role }

trait PackHelper {

  protected def writeUint(stream: OutputStream, v: Long) = {
    var value = v
    while (value > 127) {
      stream.write(((value & 127) | 128).toInt)
      value >>= 7
    }
    stream.write((value & 127).toInt)
  }

  protected def readUint(stream: InputStream): Long = {
    var value: Long = 0
    var i: Int      = 0
    var byte: Int   = 0

    do {
      byte = stream.read()
      value |= (byte.toLong & 127) << (7 * i)
      i += 1
    } while ((byte & 128) != 0)

    value
  }

  protected def writeUint16(stream: OutputStream, v: Int) = {
    stream.write(0xff & (v >> 8))
    stream.write(0xff & v)
  }

  protected def readUint16(stream: InputStream): Int =
    stream.read() << 8 | stream.read()

  protected def writeUint48(stream: OutputStream, v: Long) = {
    stream.write((0xff & (v >> 40)).toInt)
    stream.write((0xff & (v >> 32)).toInt)
    stream.write((0xff & (v >> 24)).toInt)
    stream.write((0xff & (v >> 16)).toInt)
    stream.write((0xff & (v >> 8)).toInt)
    stream.write((0xff & v).toInt)
  }

  protected def readUint48(stream: InputStream): Long =
    stream.read.toLong << 40 | stream.read.toLong << 32 |
      stream.read.toLong << 24 | stream.read.toLong << 16 |
      stream.read.toLong << 8 | stream.read.toLong

  protected def writeUci(stream: OutputStream, move: Uci.Move): Unit =
    writeUint16(
      stream,
      Pos.all.indexOf(move.orig) |
        Pos.all.indexOf(move.dest) << 6 |
        move.promotion.fold(0)(r => (Role.allPromotable.indexOf(r)) + 1) << 12
    )

  protected def writeUci(stream: OutputStream, drop: Uci.Drop): Unit = {
    val dest = Pos.all.indexOf(drop.pos)
    writeUint16(stream, dest | dest << 6 | (Role.all.indexOf(drop.role) + 1) << 12)
  }

  protected def writeUci(stream: OutputStream, move: Either[Uci.Move, Uci.Drop]): Unit =
    move.fold(writeUci(stream, _), writeUci(stream, _))

  protected def readUci(stream: InputStream): Either[Uci.Move, Uci.Drop] = {
    val enc  = readUint16(stream)
    val orig = Pos.all(enc & 63)
    val dest = Pos.all((enc >> 6) & 63)
    if (orig == dest) {
      Right(new Uci.Drop(Role.all((enc >> 12) - 1), dest))
    } else {
      val role = if ((enc >> 12) != 0) Some(Role.allPromotable((enc >> 12) - 1)) else None
      Left(new Uci.Move(orig, dest, role))
    }
  }
} 
Example 187
Source File: SubEntry.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import chess.format.Uci
import java.io.{ InputStream, OutputStream }

case class SubEntry(
    moves: Map[Either[Uci.Move, Uci.Drop], MoveStats],
    gameRefs: List[GameRef]
) extends PackHelper {

  lazy val totalWhite = moves.values.map(_.white).sum
  lazy val totalDraws = moves.values.map(_.draws).sum
  lazy val totalBlack = moves.values.map(_.black).sum

  def totalGames = totalWhite + totalDraws + totalBlack

  def isEmpty = totalGames == 0

  def totalAverageRatingSum = moves.values.map(_.averageRatingSum).sum

  def averageRating: Int =
    if (totalGames == 0) 0 else (totalAverageRatingSum / totalGames).toInt

  def withGameRef(game: GameRef, move: Either[Uci.Move, Uci.Drop]) =
    new SubEntry(
      moves + (move -> moves.getOrElse(move, MoveStats.empty).withGameRef(game)),
      game :: gameRefs
    )

  def withExistingGameRef(game: GameRef) = copy(gameRefs = game :: gameRefs)

  def withoutExistingGameRef(game: GameRef, move: Either[Uci.Move, Uci.Drop]) = {
    val stats =
      moves
        .get(move)
        .map(_.withoutExistingGameRef(game: GameRef))
        .getOrElse(MoveStats.empty)

    new SubEntry(
      if (stats.total > 0) moves + (move -> stats) else moves - move,
      gameRefs.filterNot(_.gameId == game.gameId)
    )
  }

  def writeStats(out: OutputStream) = {
    writeUint(out, moves.size)
    moves.foreach {
      case (move, stats) =>
        writeUci(out, move)
        stats.write(out)
    }
  }

  def write(out: OutputStream) = {
    writeStats(out)

    gameRefs
      .sortWith(_.averageRating > _.averageRating)
      .distinct
      .take(SubEntry.maxTopGames)
      .foreach(_.write(out))
  }
}

object SubEntry extends PackHelper {

  val maxTopGames = 4

  def empty = new SubEntry(Map.empty, List.empty)

  def fromGameRef(game: GameRef, move: Either[Uci.Move, Uci.Drop]) =
    empty.withGameRef(game, move)

  def fromExistingGameRef(game: GameRef) =
    empty.withExistingGameRef(game)

  def readStats(in: InputStream, gameRefs: List[GameRef] = List.empty): SubEntry = {
    var remainingMoves = readUint(in)
    val moves          = scala.collection.mutable.Map.empty[Either[Uci.Move, Uci.Drop], MoveStats]
    while (remainingMoves > 0) {
      moves += (readUci(in) -> MoveStats.read(in))
      remainingMoves -= 1;
    }
    new SubEntry(moves.toMap, gameRefs)
  }

  def read(in: InputStream) = {
    val subEntry = readStats(in)

    val gameRefs = scala.collection.mutable.ListBuffer.empty[GameRef]
    while (in.available > 0) {
      gameRefs += GameRef.read(in)
    }

    subEntry.copy(gameRefs = gameRefs.toList)
  }
} 
Example 188
Source File: MoveStats.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import java.io.{ InputStream, OutputStream }

import chess.Color

case class MoveStats(
    white: Long,
    draws: Long,
    black: Long,
    averageRatingSum: Long
) extends PackHelper {

  def total = white + draws + black

  def isEmpty = total == 0

  def averageRating: Int =
    if (total == 0) 0 else (averageRatingSum / total).toInt

  def withGameRef(game: GameRef) = {
    val avgRatingSum = averageRatingSum + game.averageRating

    game.winner match {
      case Some(Color.White) =>
        copy(white = white + 1, averageRatingSum = avgRatingSum)
      case Some(Color.Black) =>
        copy(black = black + 1, averageRatingSum = avgRatingSum)
      case None =>
        copy(draws = draws + 1, averageRatingSum = avgRatingSum)
    }
  }

  def withoutExistingGameRef(game: GameRef) = {
    val avgRatingSum = averageRatingSum - game.averageRating

    game.winner match {
      case Some(Color.White) =>
        copy(white = white - 1, averageRatingSum = avgRatingSum)
      case Some(Color.Black) =>
        copy(black = black - 1, averageRatingSum = avgRatingSum)
      case None =>
        copy(draws = draws - 1, averageRatingSum = avgRatingSum)
    }
  }

  def add(other: MoveStats) =
    new MoveStats(
      white + other.white,
      draws + other.draws,
      black + other.black,
      averageRatingSum + other.averageRatingSum
    )

  def write(out: OutputStream) = {
    writeUint(out, white)
    writeUint(out, draws)
    writeUint(out, black)
    writeUint(out, averageRatingSum)
  }
}

object MoveStats extends PackHelper {

  def empty = new MoveStats(0, 0, 0, 0)

  def fromGameRef(game: GameRef) = empty.withGameRef(game)

  def read(in: InputStream) =
    new MoveStats(readUint(in), readUint(in), readUint(in), readUint(in))
} 
Example 189
Source File: OffsetSeqLog.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming


import java.io.{InputStream, OutputStream}
import java.nio.charset.StandardCharsets._

import scala.io.{Source => IOSource}

import org.apache.spark.sql.SparkSession


class OffsetSeqLog(sparkSession: SparkSession, path: String)
  extends HDFSMetadataLog[OffsetSeq](sparkSession, path) {

  override protected def deserialize(in: InputStream): OffsetSeq = {
    // called inside a try-finally where the underlying stream is closed in the caller
    def parseOffset(value: String): Offset = value match {
      case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null
      case json => SerializedOffset(json)
    }
    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
    if (!lines.hasNext) {
      throw new IllegalStateException("Incomplete log file")
    }
    val version = lines.next()
    if (version != OffsetSeqLog.VERSION) {
      throw new IllegalStateException(s"Unknown log version: ${version}")
    }

    // read metadata
    val metadata = lines.next().trim match {
      case "" => None
      case md => Some(md)
    }
    OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*)
  }

  override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = {
    // called inside a try-finally where the underlying stream is closed in the caller
    out.write(OffsetSeqLog.VERSION.getBytes(UTF_8))

    // write metadata
    out.write('\n')
    out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8))

    // write offsets, one per line
    offsetSeq.offsets.map(_.map(_.json)).foreach { offset =>
      out.write('\n')
      offset match {
        case Some(json: String) => out.write(json.getBytes(UTF_8))
        case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8))
      }
    }
  }
}

object OffsetSeqLog {
  private val VERSION = "v1"
  private val SERIALIZED_VOID_OFFSET = "-"
} 
Example 190
Source File: ProcessTestUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
} 
Example 191
Source File: CryptoStreamUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.security

import java.io.{InputStream, OutputStream}
import java.util.Properties
import javax.crypto.KeyGenerator
import javax.crypto.spec.{IvParameterSpec, SecretKeySpec}

import org.apache.commons.crypto.random._
import org.apache.commons.crypto.stream._

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._


  private[this] def createInitializationVector(properties: Properties): Array[Byte] = {
    val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
    val initialIVStart = System.currentTimeMillis()
    CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv)
    val initialIVFinish = System.currentTimeMillis()
    val initialIVTime = initialIVFinish - initialIVStart
    if (initialIVTime > 2000) {
      logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " +
        s"used by CryptoStream")
    }
    iv
  }
} 
Example 192
Source File: CommandUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
} 
Example 193
Source File: EventHistoryReporter.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
package com.qubole.sparklens.app

import java.io.{BufferedInputStream, InputStream}
import java.net.URI

import com.ning.compress.lzf.LZFInputStream
import com.qubole.sparklens.QuboleJobListener
import com.qubole.sparklens.common.Json4sWrapper
import com.qubole.sparklens.helper.HDFSConfigHelper
import net.jpountz.lz4.LZ4BlockInputStream
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkConf
import org.json4s.DefaultFormats
import org.xerial.snappy.SnappyInputStream


class EventHistoryReporter(file: String, extraConf: List[(String, String)] = List.empty) {

  // This is using reflection in spark-2.0.0 ReplayListenerBus
  val busKlass = Class.forName("org.apache.spark.scheduler.ReplayListenerBus")
  val bus = busKlass.newInstance()
  val addListenerMethod = busKlass.getMethod("addListener", classOf[Object])
  val conf = new SparkConf()
    .set("spark.sparklens.reporting.disabled", "false")
    .set("spark.sparklens.save.data", "false")

  extraConf.foreach(x => {
    conf.set(x._1, x._2)
  })

  val listener = new QuboleJobListener(conf)
  addListenerMethod.invoke(bus, listener)


  try {
    val replayMethod = busKlass.getMethod("replay", classOf[InputStream], classOf[String],
      classOf[Boolean])
    replayMethod.invoke(bus, getDecodedInputStream(file, conf), file, boolean2Boolean(false))
  } catch {
    case _: NoSuchMethodException => // spark binaries are 2.1* and above
      val replayMethod = busKlass.getMethod("replay", classOf[InputStream], classOf[String],
        classOf[Boolean], classOf[String => Boolean])
      replayMethod.invoke(bus, getDecodedInputStream(file, conf), file, boolean2Boolean(false),
        getFilter _)
    case x: Exception => {
     println(s"Failed replaying events from ${file} [${x.getMessage}]")
    }
  }


  // Borrowed from CompressionCodecs in spark
  private def getDecodedInputStream(file: String, conf: SparkConf): InputStream = {

    val fs = FileSystem.get(new URI(file), HDFSConfigHelper.getHadoopConf(Some(conf)))
    val path = new Path(file)
    val bufStream = new BufferedInputStream(fs.open(path))

    val logName = path.getName.stripSuffix(".inprogress")
    val codecName: Option[String] = logName.split("\\.").tail.lastOption

    codecName.getOrElse("") match {
      case "lz4" => new LZ4BlockInputStream(bufStream)
      case "lzf" => new LZFInputStream(bufStream)
      case "snappy" => new SnappyInputStream(bufStream)
      case _ => bufStream
    }
  }

  private def getFilter(eventString: String): Boolean = {
    implicit val formats = DefaultFormats
    eventFilter.contains(Json4sWrapper.parse(eventString).extract[Map[String, Any]].get("Event")
      .get.asInstanceOf[String])
  }

  private def eventFilter: Set[String] = {
    Set(
      "SparkListenerTaskEnd",
      "SparkListenerApplicationStart",
      "SparkListenerApplicationEnd",
      "SparkListenerExecutorAdded",
      "SparkListenerExecutorRemoved",
      "SparkListenerJobStart",
      "SparkListenerJobEnd",
      "SparkListenerStageSubmitted",
      "SparkListenerStageCompleted"
    )
  }

} 
Example 194
Source File: CustomReceiver.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples.streaming

import java.io.{InputStreamReader, BufferedReader, InputStream}
import java.net.Socket

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.receiver.Receiver


  private def receive() {
   var socket: Socket = null
   var userInput: String = null
   try {
     logInfo("Connecting to " + host + ":" + port)
     socket = new Socket(host, port)
     logInfo("Connected to " + host + ":" + port)
     val reader = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8"))
     userInput = reader.readLine()
     while(!isStopped && userInput != null) {
       store(userInput)
       userInput = reader.readLine()
     }
     reader.close()
     socket.close()
     logInfo("Stopped receiving")
     restart("Trying to connect again")
   } catch {
     case e: java.net.ConnectException =>
       restart("Error connecting to " + host + ":" + port, e)
     case t: Throwable =>
       restart("Error receiving data", t)
   }
  }
} 
Example 195
Source File: MetricsConfig.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.Logging
import org.apache.spark.util.Utils

private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  def initialize() {
    // Add default properties in case there's no properties file
    setDefaultProperties(properties)

    // If spark.metrics.conf is not set, try to get file in class path
    val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse {
      try {
        Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME))
      } catch {
        case e: Exception =>
          logError("Error loading default configuration file", e)
          None
      }
    }

    isOpt.foreach { is =>
      try {
        properties.load(is)
      } finally {
        is.close()
      }
    }

    propertyCategories = subProperties(properties, INSTANCE_REGEX)
    if (propertyCategories.contains(DEFAULT_PREFIX)) {
      import scala.collection.JavaConversions._

      val defaultProperty = propertyCategories(DEFAULT_PREFIX)
      for { (inst, prop) <- propertyCategories
            if (inst != DEFAULT_PREFIX)
            (k, v) <- defaultProperty
            if (prop.getProperty(k) == null) } {
        prop.setProperty(k, v)
      }
    }
  }

  def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
    val subProperties = new mutable.HashMap[String, Properties]
    import scala.collection.JavaConversions._
    prop.foreach { kv =>
      if (regex.findPrefixOf(kv._1).isDefined) {
        val regex(prefix, suffix) = kv._1
        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
      }
    }
    subProperties
  }

  def getInstance(inst: String): Properties = {
    propertyCategories.get(inst) match {
      case Some(s) => s
      case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
    }
  }
} 
Example 196
Source File: CommandUtils.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}
import java.lang.System._

import scala.collection.JavaConversions._
import scala.collection.Map

import org.apache.spark.Logging
import org.apache.spark.deploy.Command
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
} 
Example 197
Source File: ReplayListenerBus.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      while (lines.hasNext) {
        currentLine = lines.next()
        try {
          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            if (!maybeTruncated || lines.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

} 
Example 198
Source File: CSVUtil.scala    From aerosolve   with Apache License 2.0 5 votes vote down vote up
package com.airbnb.common.ml.strategy.testutil

import java.io.InputStream

import scala.reflect.ClassTag

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object CSVUtil {
  def readCSVToLines(path: String): Iterator[Array[String]] = {
    val stream : InputStream = getClass.getResourceAsStream(path)
    val lines = scala.io.Source.fromInputStream( stream ).getLines.drop(1)
    lines.map( line =>
      line.split(",").map(_.stripPrefix("\"").stripSuffix("\"").trim)
    )
  }

  def parseCSVToSeq[T:ClassTag](name: String,
                                parseKey: (Array[String]) => String,
                                parseSample:(Array[String]) => T): Seq[(String, Seq[T])] = {
    val lines = readCSVToLines(name)
    val samples = lines.map(cols =>
      (parseKey(cols), parseSample(cols))
    ).toSeq.
      groupBy(_._1).map{
      case (key, seq) =>
        (key, seq.map(_._2))
    }.toSeq
    samples
  }

  def parseCSVToRDD[T:ClassTag](name: String,
                                parseKey: (Array[String]) => String,
                                parseSample:(Array[String]) => T,
                                sc: SparkContext): RDD[(String, Seq[T])] = {
    val samples = parseCSVToSeq(name, parseKey, parseSample)
    val rdd = sc.parallelize(samples)
    rdd
  }

} 
Example 199
Source File: TestHelper.scala    From odsc-west-streaming-trends   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.StandardCharsets

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import com.twilio.open.protocol.Calls.CallEvent
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

  def asMockKafkaDataFrame(event: CallEvent): MockKafkaDataFrame = {
    val key = event.getEventId.getBytes(StandardCharsets.UTF_8)
    val value = event.toByteArray
    MockKafkaDataFrame(key, value)
  }

}

case class MockKafkaDataFrame(key: Array[Byte], value: Array[Byte])


@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

} 
Example 200
Source File: CompressedFiles.scala    From tensorflow_scala   with Apache License 2.0 5 votes vote down vote up
package org.platanios.tensorflow.data.utilities

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.utils.IOUtils

import java.io.{File, FileOutputStream, InputStream}
import java.nio.file.{Files, Path}
import java.util.zip.GZIPInputStream


object CompressedFiles {
  def decompressTGZ(tgzFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    decompressTGZStream(Files.newInputStream(tgzFilePath), destinationPath, bufferSize)
  }

  def decompressTar(tarFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    decompressTarStream(Files.newInputStream(tarFilePath), destinationPath, bufferSize)
  }

  def decompressTGZStream(tgzStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    decompressTarStream(new GZIPInputStream(tgzStream), destinationPath, bufferSize)
  }

  def decompressTarStream(tarStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    val inputStream = new TarArchiveInputStream(tarStream)
    var entry = inputStream.getNextTarEntry
    while (entry != null) {
      if (!entry.isDirectory) {
        val currentFile = new File(destinationPath.toAbsolutePath.toString, entry.getName)
        val parentFile = currentFile.getParentFile
        if (!parentFile.exists)
          parentFile.mkdirs()
        IOUtils.copy(inputStream, new FileOutputStream(currentFile))
      }
      entry = inputStream.getNextTarEntry
    }
    inputStream.close()
  }
}