org.slf4j.LoggerFactory Scala Examples

The following examples show how to use org.slf4j.LoggerFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: DefaultSource.scala    From spark-snowflake   with Apache License 2.0 7 votes vote down vote up
package net.snowflake.spark.snowflake

import net.snowflake.spark.snowflake.streaming.SnowflakeSink
import net.snowflake.spark.snowflake.Utils.SNOWFLAKE_SOURCE_SHORT_NAME
import org.apache.spark.sql.execution.streaming.Sink
import org.apache.spark.sql.sources._
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
import org.slf4j.LoggerFactory


  override def createRelation(sqlContext: SQLContext,
                              saveMode: SaveMode,
                              parameters: Map[String, String],
                              data: DataFrame): BaseRelation = {

    val params = Parameters.mergeParameters(parameters)
    // check spark version for push down
    if (params.autoPushdown) {
      SnowflakeConnectorUtils.checkVersionAndEnablePushdown(
        sqlContext.sparkSession
      )
    }
    // pass parameters to pushdown functions
    pushdowns.setGlobalParameter(params)
    val table = params.table.getOrElse {
      throw new IllegalArgumentException(
        "For save operations you must specify a Snowfake table name with the 'dbtable' parameter"
      )
    }

    def tableExists: Boolean = {
      val conn = jdbcWrapper.getConnector(params)
      try {
        jdbcWrapper.tableExists(conn, table.toString)
      } finally {
        conn.close()
      }
    }

    val (doSave, dropExisting) = saveMode match {
      case SaveMode.Append => (true, false)
      case SaveMode.Overwrite => (true, true)
      case SaveMode.ErrorIfExists =>
        if (tableExists) {
          sys.error(
            s"Table $table already exists! (SaveMode is set to ErrorIfExists)"
          )
        } else {
          (true, false)
        }
      case SaveMode.Ignore =>
        if (tableExists) {
          log.info(s"Table $table already exists -- ignoring save request.")
          (false, false)
        } else {
          (true, false)
        }
    }

    if (doSave) {
      val updatedParams = parameters.updated("overwrite", dropExisting.toString)
      new SnowflakeWriter(jdbcWrapper)
        .save(
          sqlContext,
          data,
          saveMode,
          Parameters.mergeParameters(updatedParams)
        )

    }

    createRelation(sqlContext, parameters)
  }

  override def createSink(sqlContext: SQLContext,
                          parameters: Map[String, String],
                          partitionColumns: Seq[String],
                          outputMode: OutputMode): Sink =
    new SnowflakeSink(sqlContext, parameters, partitionColumns, outputMode)
} 
Example 2
Source File: SnowflakeConnectorUtils.scala    From spark-snowflake   with Apache License 2.0 6 votes vote down vote up
package net.snowflake.spark.snowflake

import java.nio.file.Paths
import java.security.InvalidKeyException

import net.snowflake.spark.snowflake.pushdowns.SnowflakeStrategy
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}


  def disablePushdownSession(session: SparkSession): Unit = {
    session.experimental.extraStrategies = session.experimental.extraStrategies
      .filterNot(strategy => strategy.isInstanceOf[SnowflakeStrategy])
  }

  def setPushdownSession(session: SparkSession, enabled: Boolean): Unit = {
    if (enabled) {
      enablePushdownSession(session)
    } else {
      disablePushdownSession(session)
    }
  }

  // TODO: Improve error handling with retries, etc.

  @throws[SnowflakeConnectorException]
  def handleS3Exception(ex: Exception): Unit = {
    if (ex.getCause.isInstanceOf[InvalidKeyException]) {
      // Most likely cause: Unlimited strength policy files not installed
      var msg: String = "Strong encryption with Java JRE requires JCE " +
        "Unlimited Strength Jurisdiction Policy " +
        "files. " +
        "Follow JDBC client installation instructions " +
        "provided by Snowflake or contact Snowflake " +
        "Support. This needs to be installed in the Java runtime for all Spark executor nodes."

      log.error(
        "JCE Unlimited Strength policy files missing: {}. {}.",
        ex.getMessage: Any,
        ex.getCause.getMessage: Any
      )

      val bootLib: String =
        java.lang.System.getProperty("sun.boot.library.path")

      if (bootLib != null) {
        msg += " The target directory on your system is: " + Paths
          .get(bootLib, "security")
          .toString
        log.error(msg)
      }

      throw new SnowflakeConnectorException(msg)
    } else {
      throw ex
    }
  }
}

class SnowflakeConnectorException(message: String) extends Exception(message)
class SnowflakePushdownException(message: String)
  extends SnowflakeConnectorException(message)
class SnowflakeConnectorFeatureNotSupportException(message: String)
  extends Exception(message)

class SnowflakePushdownUnsupportedException(message: String,
                                            val unsupportedOperation: String,
                                            val details: String,
                                            val isKnownUnsupportedOperation: Boolean)
  extends Exception(message) 
Example 3
Source File: MLSQLSpringConfiguration.scala    From Linkis   with Apache License 2.0 6 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.conf

import com.webank.wedatasphere.linkis.entrance.EntranceParser
import com.webank.wedatasphere.linkis.entrance.annotation._
import com.webank.wedatasphere.linkis.entrance.execute._
import com.webank.wedatasphere.linkis.entrance.executer.MLSQLEngineExecutorManagerImpl
import com.webank.wedatasphere.linkis.entrance.parser.MLSQLEntranceParser
import com.webank.wedatasphere.linkis.scheduler.queue.GroupFactory
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean
import org.springframework.context.annotation.Configuration

@Configuration
class MLSQLSpringConfiguration {


  private val logger = LoggerFactory.getLogger(classOf[MLSQLSpringConfiguration])

  @EntranceExecutorManagerBeanAnnotation
  @ConditionalOnMissingBean(value = Array(classOf[EntranceExecutorManager]))
  def generateEntranceExecutorManager(@GroupFactoryBeanAnnotation.GroupFactoryAutowiredAnnotation groupFactory: GroupFactory,
                                      @EngineBuilderBeanAnnotation.EngineBuilderAutowiredAnnotation engineBuilder: EngineBuilder,
                                      @EngineRequesterBeanAnnotation.EngineRequesterAutowiredAnnotation engineRequester: EngineRequester,
                                      @EngineSelectorBeanAnnotation.EngineSelectorAutowiredAnnotation engineSelector: EngineSelector,
                                      @EngineManagerBeanAnnotation.EngineManagerAutowiredAnnotation engineManager: EngineManager,
                                      @Autowired entranceExecutorRulers: Array[EntranceExecutorRuler]): EntranceExecutorManager =
    new MLSQLEngineExecutorManagerImpl(groupFactory, engineBuilder, engineRequester, engineSelector, engineManager, entranceExecutorRulers)


  @EntranceParserBeanAnnotation
  @ConditionalOnMissingBean(name = Array(EntranceParserBeanAnnotation.BEAN_NAME))
  def generateEntranceParser(): EntranceParser = {
    logger.info("begin to get MLSQL Entrance parser")
    new MLSQLEntranceParser()
  }

} 
Example 4
Source File: JDBCSpringConfiguration.scala    From Linkis   with Apache License 2.0 6 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.conf

import com.webank.wedatasphere.linkis.entrance.EntranceParser
import com.webank.wedatasphere.linkis.entrance.annotation._
import com.webank.wedatasphere.linkis.entrance.execute._
import com.webank.wedatasphere.linkis.entrance.executer.JDBCEngineExecutorManagerImpl
import com.webank.wedatasphere.linkis.entrance.parser.JDBCEntranceParser
import com.webank.wedatasphere.linkis.scheduler.queue.GroupFactory
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.context.annotation.Configuration

@Configuration
class JDBCSpringConfiguration {


  private val logger = LoggerFactory.getLogger(classOf[JDBCSpringConfiguration])

  @EntranceExecutorManagerBeanAnnotation
  def generateEntranceExecutorManager(@GroupFactoryBeanAnnotation.GroupFactoryAutowiredAnnotation groupFactory: GroupFactory,
                                      @EngineBuilderBeanAnnotation.EngineBuilderAutowiredAnnotation engineBuilder: EngineBuilder,
                                      @EngineRequesterBeanAnnotation.EngineRequesterAutowiredAnnotation engineRequester: EngineRequester,
                                      @EngineSelectorBeanAnnotation.EngineSelectorAutowiredAnnotation engineSelector: EngineSelector,
                                      @EngineManagerBeanAnnotation.EngineManagerAutowiredAnnotation engineManager: EngineManager,
                                      @Autowired entranceExecutorRulers: Array[EntranceExecutorRuler]): EntranceExecutorManager =
    new JDBCEngineExecutorManagerImpl(groupFactory, engineBuilder, engineRequester, engineSelector, engineManager, entranceExecutorRulers)




  @EntranceParserBeanAnnotation
  def generateEntranceParser():EntranceParser = {
    logger.info("begin to get JDBC Entrance parser")
    new JDBCEntranceParser()
  }

} 
Example 5
Source File: TestHook.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake.test

import net.snowflake.client.jdbc.{ErrorCode, SnowflakeSQLException}
import net.snowflake.spark.snowflake.test.TestHookFlag.TestHookFlag
import org.slf4j.{Logger, LoggerFactory}

object TestHookFlag extends Enumeration {
  type TestHookFlag = Value

  // All predefined test hook's name start with TH_ (TEST HOOK).
  val TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE = Value("TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE")
  val TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE = Value("TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE")
  val TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE = Value("TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE")
  val TH_WRITE_ERROR_AFTER_COPY_INTO = Value("TH_WRITE_ERROR_AFTER_COPY_INTO")
  val TH_GCS_UPLOAD_RAISE_EXCEPTION = Value("TH_GCS_UPLOAD_RAISE_EXCEPTION")
  val TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS = Value("TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS")
  val TH_COPY_INTO_TABLE_MISS_FILES_FAIL = Value("TH_COPY_INTO_TABLE_MISS_FILES_FAIL")
}

object TestHook {
  val log: Logger = LoggerFactory.getLogger(getClass)

  private val ENABLED_TEST_FLAGS =
    new scala.collection.mutable.HashSet[TestHookFlag]()

  private var IS_TEST_ENABLED = false

  private val TEST_MESSAGE_PREFIX =
    "Internal test error (should NOT be seen by user):"

  // Enable test
  private[snowflake] def enableTestHook() : Unit = {
    IS_TEST_ENABLED = true
  }

  // Disable test
  private[snowflake] def disableTestHook() : Unit = {
    IS_TEST_ENABLED = false
    ENABLED_TEST_FLAGS.clear()
  }

  // Enable a specific test flag
  private[snowflake] def enableTestFlag(testFlag : TestHookFlag): Unit = {
    enableTestHook()
    if (!ENABLED_TEST_FLAGS.contains(testFlag)) {
      ENABLED_TEST_FLAGS.add(testFlag)
    }
  }

  // Enable a specific test flag only (all other flags are disabled)
  private[snowflake] def enableTestFlagOnly(testFlag : TestHookFlag): Unit = {
    disableTestHook()
    enableTestFlag(testFlag)
  }

  // Disable a specific test flag
  private[snowflake] def disableTestFlag(testFlag : TestHookFlag): Unit = {
    if (ENABLED_TEST_FLAGS.contains(testFlag)) {
      ENABLED_TEST_FLAGS.remove(testFlag)
    }
    if (ENABLED_TEST_FLAGS.isEmpty) {
      disableTestHook()
    }
  }

  // Check whether a flag is enabled
  private[snowflake] def isTestFlagEnabled(testFlag : TestHookFlag): Boolean = {
    IS_TEST_ENABLED && ENABLED_TEST_FLAGS.contains(testFlag)
  }

  // Raise exception if the specific test flag is enabled.
  private[snowflake] def raiseExceptionIfTestFlagEnabled(testFlag: TestHookFlag,
                                                         errorMessage: String)
  : Unit = {
    if (isTestFlagEnabled(testFlag)) {
      throw new SnowflakeSQLException(ErrorCode.INTERNAL_ERROR,
        s"$TEST_MESSAGE_PREFIX  $errorMessage")
    }
  }
} 
Example 6
Source File: ClusterTest.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake

import net.snowflake.spark.snowflake.testsuite.ClusterTestSuiteBase
import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.sql.SparkSession

object ClusterTest {
  val log: Logger = LoggerFactory.getLogger(getClass)

  val RemoteMode = "remote"
  val LocalMode = "local"

  val TestSuiteSeparator = ";"

  // Driver function to run the test.
  def main(args: Array[String]): Unit = {
    log.info(s"Test Spark Connector: ${net.snowflake.spark.snowflake.Utils.VERSION}")

    val usage = s"""Two parameters are need: [local | remote] and
                    | testClassNames (using ';' to separate multiple classes)
                    |""".stripMargin
    log.info(usage)

    if (args.length < 2) {
      throw new Exception(s"At least two parameters are need. Usage: $usage")
    }

    // Setup Spark session.
    // local mode is introduced for debugging purpose
    val runMode = args(0)
    var sparkSessionBuilder = SparkSession
      .builder()
      .appName("Spark SQL basic example")
      .config("spark.some.config.option", "some-value")
    if (runMode.equalsIgnoreCase(LocalMode)) {
      sparkSessionBuilder = sparkSessionBuilder
        .config("spark.master", "local")
    }
    val spark = sparkSessionBuilder.getOrCreate()

    // Run specified test suites
    val testSuiteNames = args(1).split(TestSuiteSeparator)
    for (testSuiteName <- testSuiteNames) {
      if (!testSuiteName.trim.isEmpty) {
        // Retrieve commit ID from env.
        val commitID = scala.util.Properties
          .envOrElse(TestUtils.GITHUB_SHA, "commit id not set")

        // val testSuiteName = "net.snowflake.spark.snowflake.testsuite.BasicReadWriteSuite"
        val resultBuilder = new ClusterTestResultBuilder()
          .withTestType("Scala")
          .withTestCaseName(testSuiteName)
          .withCommitID(commitID)
          .withTestStatus(TestUtils.TEST_RESULT_STATUS_INIT)
          .withStartTimeInMill(System.currentTimeMillis())
          .withGithubRunId(TestUtils.githubRunId)

        try {
          Class
            .forName(testSuiteName)
            .newInstance()
            .asInstanceOf[ClusterTestSuiteBase]
            .run(spark, resultBuilder)
        } catch {
          case e: Throwable =>
            log.error(e.getMessage)
            resultBuilder
              .withTestStatus(TestUtils.TEST_RESULT_STATUS_EXCEPTION)
              .withReason(e.getMessage)
        } finally {
          // Set test end time.
          resultBuilder
            .withEndTimeInMill(System.currentTimeMillis())
          // Write test result
          resultBuilder.build().writeToSnowflake()
        }
      }
    }

    spark.stop()
  }
} 
Example 7
Source File: AuthServiceJWT.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth

import java.util.concurrent.{CompletableFuture, CompletionStage}

import com.daml.lf.data.Ref
import com.daml.jwt.{JwtVerifier, JwtVerifierBase}
import com.daml.ledger.api.auth.AuthServiceJWT.Error
import io.grpc.Metadata
import org.slf4j.{Logger, LoggerFactory}
import spray.json._

import scala.collection.mutable.ListBuffer
import scala.util.Try


class AuthServiceJWT(verifier: JwtVerifierBase) extends AuthService {

  protected val logger: Logger = LoggerFactory.getLogger(AuthServiceJWT.getClass)

  override def decodeMetadata(headers: Metadata): CompletionStage[Claims] = {
    decodeAndParse(headers).fold(
      error => {
        logger.warn("Authorization error: " + error.message)
        CompletableFuture.completedFuture(Claims.empty)
      },
      token => CompletableFuture.completedFuture(payloadToClaims(token))
    )
  }

  private[this] def parsePayload(jwtPayload: String): Either[Error, AuthServiceJWTPayload] = {
    import AuthServiceJWTCodec.JsonImplicits._
    Try(JsonParser(jwtPayload).convertTo[AuthServiceJWTPayload]).toEither.left.map(t =>
      Error("Could not parse JWT token: " + t.getMessage))
  }

  private[this] def decodeAndParse(headers: Metadata): Either[Error, AuthServiceJWTPayload] = {
    val bearerTokenRegex = "Bearer (.*)".r

    for {
      headerValue <- Option
        .apply(headers.get(AUTHORIZATION_KEY))
        .toRight(Error("Authorization header not found"))
      token <- bearerTokenRegex
        .findFirstMatchIn(headerValue)
        .map(_.group(1))
        .toRight(Error("Authorization header does not use Bearer format"))
      decoded <- verifier
        .verify(com.daml.jwt.domain.Jwt(token))
        .toEither
        .left
        .map(e => Error("Could not verify JWT token: " + e.message))
      parsed <- parsePayload(decoded.payload)
    } yield parsed
  }

  private[this] def payloadToClaims(payload: AuthServiceJWTPayload): Claims = {
    val claims = ListBuffer[Claim]()

    // Any valid token authorizes the user to use public services
    claims.append(ClaimPublic)

    if (payload.admin)
      claims.append(ClaimAdmin)

    payload.actAs
      .foreach(party => claims.append(ClaimActAsParty(Ref.Party.assertFromString(party))))

    payload.readAs
      .foreach(party => claims.append(ClaimReadAsParty(Ref.Party.assertFromString(party))))

    Claims(
      claims = claims.toList,
      ledgerId = payload.ledgerId,
      participantId = payload.participantId,
      applicationId = payload.applicationId,
      expiration = payload.exp,
    )
  }
}

object AuthServiceJWT {
  final case class Error(message: String)

  def apply(verifier: com.auth0.jwt.interfaces.JWTVerifier) =
    new AuthServiceJWT(new JwtVerifier(verifier))

  def apply(verifier: JwtVerifierBase) =
    new AuthServiceJWT(verifier)
} 
Example 8
Source File: AuthorizationInterceptor.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth.interceptor

import com.daml.ledger.api.auth.{AuthService, Claims}
import com.daml.platform.server.api.validation.ErrorFactories.unauthenticated
import io.grpc.{
  Context,
  Contexts,
  Metadata,
  ServerCall,
  ServerCallHandler,
  ServerInterceptor,
  Status
}
import org.slf4j.{Logger, LoggerFactory}

import scala.compat.java8.FutureConverters
import scala.concurrent.ExecutionContext
import scala.util.{Failure, Success, Try}


final class AuthorizationInterceptor(protected val authService: AuthService, ec: ExecutionContext)
    extends ServerInterceptor {

  private val logger: Logger = LoggerFactory.getLogger(AuthorizationInterceptor.getClass)
  private val internalAuthenticationError =
    Status.INTERNAL.withDescription("Failed to get claims from request metadata")

  import AuthorizationInterceptor.contextKeyClaim

  override def interceptCall[ReqT, RespT](
      call: ServerCall[ReqT, RespT],
      headers: Metadata,
      nextListener: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
    // Note: Context uses ThreadLocal storage, we need to capture it outside of the async block below.
    // Contexts are immutable and safe to pass around.
    val prevCtx = Context.current

    // The method interceptCall() must return a Listener.
    // The target listener is created by calling `Contexts.interceptCall()`.
    // However, this is only done after we have asynchronously received the claims.
    // Therefore, we need to return a listener that buffers all messages until the target listener is available.
    new AsyncForwardingListener[ReqT] {
      FutureConverters
        .toScala(authService.decodeMetadata(headers))
        .onComplete {
          case Failure(exception) =>
            logger.warn(s"Failed to get claims from request metadata: ${exception.getMessage}")
            call.close(internalAuthenticationError, new Metadata())
            new ServerCall.Listener[Nothing]() {}
          case Success(Claims.empty) =>
            logger.debug(s"Auth metadata decoded into empty claims, returning UNAUTHENTICATED")
            call.close(Status.UNAUTHENTICATED, new Metadata())
            new ServerCall.Listener[Nothing]() {}
          case Success(claims) =>
            val nextCtx = prevCtx.withValue(contextKeyClaim, claims)
            // Contexts.interceptCall() creates a listener that wraps all methods of `nextListener`
            // such that `Context.current` returns `nextCtx`.
            val nextListenerWithContext =
              Contexts.interceptCall(nextCtx, call, headers, nextListener)
            setNextListener(nextListenerWithContext)
            nextListenerWithContext
        }(ec)
    }
  }
}

object AuthorizationInterceptor {

  private val contextKeyClaim = Context.key[Claims]("AuthServiceDecodedClaim")

  def extractClaimsFromContext(): Try[Claims] =
    Option(contextKeyClaim.get()).fold[Try[Claims]](Failure(unauthenticated()))(Success(_))

  def apply(authService: AuthService, ec: ExecutionContext): AuthorizationInterceptor =
    new AuthorizationInterceptor(authService, ec)

} 
Example 9
Source File: LedgerContext.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.sandbox.perf

import akka.actor.ActorSystem
import akka.pattern
import com.daml.lf.data.Ref.PackageId
import com.daml.ledger.api.domain
import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc
import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc.ActiveContractsServiceStub
import com.daml.ledger.api.v1.command_service.CommandServiceGrpc
import com.daml.ledger.api.v1.command_service.CommandServiceGrpc.CommandService
import com.daml.ledger.api.v1.ledger_identity_service.LedgerIdentityServiceGrpc.LedgerIdentityServiceStub
import com.daml.ledger.api.v1.ledger_identity_service.{
  GetLedgerIdentityRequest,
  LedgerIdentityServiceGrpc
}
import com.daml.ledger.api.v1.testing.reset_service.ResetServiceGrpc.ResetService
import com.daml.ledger.api.v1.testing.reset_service.{ResetRequest, ResetServiceGrpc}
import io.grpc.{Channel, StatusRuntimeException}
import org.slf4j.LoggerFactory
import scalaz.syntax.tag._

import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}

final class LedgerContext(channel: Channel, packageIds: Iterable[PackageId])(
    implicit executionContext: ExecutionContext
) {

  private val logger = LoggerFactory.getLogger(this.getClass)

  val ledgerId: domain.LedgerId =
    domain.LedgerId(
      LedgerIdentityServiceGrpc
        .blockingStub(channel)
        .getLedgerIdentity(GetLedgerIdentityRequest())
        .ledgerId)

  def reset()(implicit system: ActorSystem): Future[LedgerContext] = {
    def waitForNewLedger(retries: Int): Future[domain.LedgerId] =
      if (retries <= 0)
        Future.failed(new RuntimeException("waitForNewLedger: out of retries"))
      else {
        ledgerIdentityService
          .getLedgerIdentity(GetLedgerIdentityRequest())
          .flatMap { resp =>
            // TODO: compare with current Ledger ID and retry when not changed
            Future.successful(domain.LedgerId(resp.ledgerId))
          }
          .recoverWith {
            case _: StatusRuntimeException =>
              logger.debug(
                "waitForNewLedger: retrying identity request in 1 second. {} retries remain",
                retries - 1)
              pattern.after(1.seconds, system.scheduler)(waitForNewLedger(retries - 1))
            case t: Throwable =>
              logger.warn("waitForNewLedger: failed to reconnect!")
              throw t
          }
      }
    for {
      _ <- resetService.reset(ResetRequest(ledgerId.unwrap))
      _ <- waitForNewLedger(10)
    } yield new LedgerContext(channel, packageIds)
  }

  def ledgerIdentityService: LedgerIdentityServiceStub =
    LedgerIdentityServiceGrpc.stub(channel)

  def commandService: CommandService =
    CommandServiceGrpc.stub(channel)

  def acsService: ActiveContractsServiceStub =
    ActiveContractsServiceGrpc.stub(channel)

  def resetService: ResetService =
    ResetServiceGrpc.stub(channel)

} 
Example 10
Source File: LedgerEntries.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.sandbox.stores.ledger.inmemory

import java.util.concurrent.atomic.AtomicReference

import akka.NotUsed
import akka.stream.scaladsl.Source
import com.daml.ledger.participant.state.v1.Offset
import com.daml.lf.data.Ref
import com.daml.platform.akkastreams.dispatcher.Dispatcher
import com.daml.platform.akkastreams.dispatcher.SubSource.RangeSource
import org.slf4j.LoggerFactory
import com.daml.platform.ApiOffset.ApiOffsetConverter
import com.daml.platform.sandbox.stores.ledger.SandboxOffset

import scala.collection.immutable.TreeMap

private[ledger] class LedgerEntries[T](identify: T => String) {

  private val logger = LoggerFactory.getLogger(this.getClass)

  private case class Entries(ledgerEnd: Offset, items: TreeMap[Offset, T])

  // Tuple of (ledger end cursor, ledger map). There is never an entry for the initial cursor. End is inclusive.
  private val state = new AtomicReference(Entries(ledgerBeginning, TreeMap.empty))

  private def store(item: T): Offset = {
    val Entries(newOffset, _) = state.updateAndGet({
      case Entries(ledgerEnd, ledger) =>
        val newEnd = SandboxOffset.toOffset(SandboxOffset.fromOffset(ledgerEnd) + 1)
        Entries(newEnd, ledger + (newEnd -> item))
    })
    if (logger.isTraceEnabled())
      logger.trace("Recording `{}` at offset `{}`", identify(item): Any, newOffset.toApiString: Any)
    newOffset
  }

  def incrementOffset(increment: Int): Offset = {
    val Entries(newOffset, _) = state.updateAndGet({
      case Entries(ledgerEnd, ledger) =>
        val newEnd = SandboxOffset.toOffset(SandboxOffset.fromOffset(ledgerEnd) + increment)
        Entries(newEnd, ledger)
    })
    if (logger.isTraceEnabled())
      logger.trace("Bumping offset to `{}`", newOffset.toApiString)
    newOffset
  }

  private val dispatcher = Dispatcher[Offset]("inmemory-ledger", Offset.beforeBegin, ledgerEnd)

  def getSource(
      startExclusive: Option[Offset],
      endInclusive: Option[Offset]): Source[(Offset, T), NotUsed] =
    dispatcher.startingAt(
      startExclusive.getOrElse(ledgerBeginning),
      RangeSource(
        (exclusiveStart, inclusiveEnd) =>
          Source[(Offset, T)](
            state.get().items.from(exclusiveStart).filter(_._1 > exclusiveStart).to(inclusiveEnd)),
      ),
      endInclusive
    )

  def publish(item: T): Offset = {
    val newHead = store(item)
    dispatcher.signalNewHead(newHead)
    newHead
  }

  def ledgerBeginning: Offset = SandboxOffset.toOffset(0)

  def items = state.get().items.iterator

  def ledgerEnd: Offset = state.get().ledgerEnd

  def nextTransactionId: Ref.LedgerString =
    Ref.LedgerString.assertFromString((SandboxOffset.fromOffset(ledgerEnd) + 1).toString)
} 
Example 11
Source File: GlobalLogLevel.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.sandbox

import ch.qos.logback.classic.Level
import org.slf4j.{Logger, LoggerFactory}

object GlobalLogLevel {
  def set(level: Level): Unit = {
    val rootLogger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)
    LoggerFactory.getILoggerFactory match {
      case loggerContext: ch.qos.logback.classic.LoggerContext =>
        rootLogger.info(s"Sandbox verbosity changed to $level")
        loggerContext.getLoggerList.forEach(_.setLevel(level))
      case _ =>
        rootLogger.warn(s"Sandbox verbosity cannot be set to requested $level")
    }
  }
} 
Example 12
Source File: TrackerMap.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.apiserver.services.tracking

import java.util.concurrent.atomic.AtomicReference

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.v1.command_service.SubmitAndWaitRequest
import com.daml.ledger.api.v1.completion.Completion
import com.daml.logging.{ContextualizedLogger, LoggingContext}
import org.slf4j.LoggerFactory

import scala.collection.immutable.HashMap
import scala.concurrent.duration.{FiniteDuration, _}
import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success}


  final class AsyncResource[T <: AutoCloseable](future: Future[T]) {
    private val logger = LoggerFactory.getLogger(this.getClass)

    // Must progress Waiting => Ready => Closed or Waiting => Closed.
    val state: AtomicReference[AsyncResourceState[T]] = new AtomicReference(Waiting)

    future.andThen({
      case Success(t) =>
        if (!state.compareAndSet(Waiting, Ready(t))) {
          // This is the punch line of AsyncResource.
          // If we've been closed in the meantime, we must close the underlying resource also.
          // This "on-failure-to-complete" behavior is not present in scala or java Futures.
          t.close()
        }
      // Someone should be listening to this failure downstream
      // TODO(mthvedt): Refactor so at least one downstream listener is always present,
      // and exceptions are never dropped.
      case Failure(ex) =>
        logger.error("failure to get async resource", ex)
        state.set(Closed)
    })(DirectExecutionContext)

    def flatMap[U](f: T => Future[U])(implicit ex: ExecutionContext): Future[U] = {
      state.get() match {
        case Waiting => future.flatMap(f)
        case Closed => throw new IllegalStateException()
        case Ready(t) => f(t)
      }
    }

    def map[U](f: T => U)(implicit ex: ExecutionContext): Future[U] =
      flatMap(t => Future.successful(f(t)))

    def ifPresent[U](f: T => U): Option[U] = state.get() match {
      case Ready(t) => Some(f(t))
      case _ => None
    }

    def close(): Unit = state.getAndSet(Closed) match {
      case Ready(t) => t.close()
      case _ =>
    }
  }

  def apply(retentionPeriod: FiniteDuration)(implicit logCtx: LoggingContext): TrackerMap =
    new TrackerMap(retentionPeriod)
} 
Example 13
Source File: CommandCompletionServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_completion_service.CommandCompletionServiceGrpc.CommandCompletionService
import com.daml.ledger.api.v1.command_completion_service._
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.dec.DirectExecutionContext
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

//TODO: this class is only needed by DamlOnXCommandCompletionService.scala. Must be deleted once that's gone!
class CommandCompletionServiceValidation(
    val service: CommandCompletionService with AutoCloseable,
    val ledgerId: LedgerId)
    extends CommandCompletionService
    with FieldValidations
    with GrpcApiService
    with ProxyCloseable
    with ErrorFactories {

  protected val logger: Logger = LoggerFactory.getLogger(CommandCompletionService.getClass)

  override def completionStream(
      request: CompletionStreamRequest,
      responseObserver: StreamObserver[CompletionStreamResponse]): Unit = {
    val validation = for {
      _ <- matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      _ <- requireNonEmptyString(request.applicationId, "application_id")
      _ <- requireNonEmpty(request.parties, "parties")
    } yield request

    validation.fold(
      exception => responseObserver.onError(exception),
      value => service.completionStream(value, responseObserver)
    )
  }

  override def completionEnd(request: CompletionEndRequest): Future[CompletionEndResponse] = {
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .fold(Future.failed, _ => service.completionEnd(request))
  }

  override def bindService(): ServerServiceDefinition =
    CommandCompletionServiceGrpc.bindService(this, DirectExecutionContext)
} 
Example 14
Source File: ActiveContractsServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc.ActiveContractsService
import com.daml.ledger.api.v1.active_contracts_service.{
  ActiveContractsServiceGrpc,
  GetActiveContractsRequest,
  GetActiveContractsResponse
}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

class ActiveContractsServiceValidation(
    protected val service: ActiveContractsService with AutoCloseable,
    val ledgerId: LedgerId)
    extends ActiveContractsService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(ActiveContractsService.getClass)

  override def getActiveContracts(
      request: GetActiveContractsRequest,
      responseObserver: StreamObserver[GetActiveContractsResponse]): Unit = {
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .fold(responseObserver.onError, _ => service.getActiveContracts(request, responseObserver))
  }
  override def bindService(): ServerServiceDefinition =
    ActiveContractsServiceGrpc.bindService(this, DirectExecutionContext)
} 
Example 15
Source File: LedgerConfigurationServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.ledger_configuration_service.LedgerConfigurationServiceGrpc.LedgerConfigurationService
import com.daml.ledger.api.v1.ledger_configuration_service.{
  GetLedgerConfigurationRequest,
  GetLedgerConfigurationResponse,
  LedgerConfigurationServiceGrpc
}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

class LedgerConfigurationServiceValidation(
    protected val service: LedgerConfigurationService with GrpcApiService,
    protected val ledgerId: LedgerId)
    extends LedgerConfigurationService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(LedgerConfigurationService.getClass)

  override def getLedgerConfiguration(
      request: GetLedgerConfigurationRequest,
      responseObserver: StreamObserver[GetLedgerConfigurationResponse]): Unit =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId)).fold(
      t => responseObserver.onError(t),
      _ => service.getLedgerConfiguration(request, responseObserver)
    )

  override def bindService(): ServerServiceDefinition =
    LedgerConfigurationServiceGrpc.bindService(this, DirectExecutionContext)
} 
Example 16
Source File: PackageServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.package_service.PackageServiceGrpc.PackageService
import com.daml.ledger.api.v1.package_service._
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.Function.const
import scala.concurrent.Future

class PackageServiceValidation(
    protected val service: PackageService with AutoCloseable,
    val ledgerId: LedgerId)
    extends PackageService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(PackageService.getClass)

  override def listPackages(request: ListPackagesRequest): Future[ListPackagesResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.listPackages
      )

  override def getPackage(request: GetPackageRequest): Future[GetPackageResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.getPackage
      )

  override def getPackageStatus(
      request: GetPackageStatusRequest): Future[GetPackageStatusResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.getPackageStatus
      )
  override def bindService(): ServerServiceDefinition =
    PackageServiceGrpc.bindService(this, DirectExecutionContext)

  override def close(): Unit = service.close()
} 
Example 17
Source File: GrpcCommandService.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.services.grpc

import java.time.{Duration, Instant}

import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_service.CommandServiceGrpc.CommandService
import com.daml.ledger.api.v1.command_service._
import com.daml.ledger.api.validation.{CommandsValidator, SubmitAndWaitRequestValidator}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.dec.DirectExecutionContext
import com.daml.platform.server.api.ProxyCloseable
import com.google.protobuf.empty.Empty
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

class GrpcCommandService(
    protected val service: CommandService with AutoCloseable,
    val ledgerId: LedgerId,
    currentLedgerTime: () => Instant,
    currentUtcTime: () => Instant,
    maxDeduplicationTime: () => Option[Duration]
) extends CommandService
    with GrpcApiService
    with ProxyCloseable {

  protected val logger: Logger = LoggerFactory.getLogger(CommandService.getClass)

  private[this] val validator =
    new SubmitAndWaitRequestValidator(new CommandsValidator(ledgerId))

  override def submitAndWait(request: SubmitAndWaitRequest): Future[Empty] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWait(request))

  override def submitAndWaitForTransactionId(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionIdResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransactionId(request))

  override def submitAndWaitForTransaction(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransaction(request))

  override def submitAndWaitForTransactionTree(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionTreeResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransactionTree(request))

  override def bindService(): ServerServiceDefinition =
    CommandServiceGrpc.bindService(this, DirectExecutionContext)

} 
Example 18
Source File: GrpcCommandSubmissionService.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.services.grpc

import java.time.{Duration, Instant}

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_submission_service.CommandSubmissionServiceGrpc.{
  CommandSubmissionService => ApiCommandSubmissionService
}
import com.daml.ledger.api.v1.command_submission_service.{
  CommandSubmissionServiceGrpc,
  SubmitRequest => ApiSubmitRequest
}
import com.daml.ledger.api.validation.{CommandsValidator, SubmitRequestValidator}
import com.daml.metrics.{Metrics, Timed}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import com.daml.platform.server.api.services.domain.CommandSubmissionService
import com.google.protobuf.empty.Empty
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

class GrpcCommandSubmissionService(
    override protected val service: CommandSubmissionService with AutoCloseable,
    ledgerId: LedgerId,
    currentLedgerTime: () => Instant,
    currentUtcTime: () => Instant,
    maxDeduplicationTime: () => Option[Duration],
    metrics: Metrics,
) extends ApiCommandSubmissionService
    with ProxyCloseable
    with GrpcApiService {

  protected val logger: Logger = LoggerFactory.getLogger(ApiCommandSubmissionService.getClass)

  private val validator = new SubmitRequestValidator(new CommandsValidator(ledgerId))

  override def submit(request: ApiSubmitRequest): Future[Empty] =
    Timed.future(
      metrics.daml.commands.submissions,
      Timed
        .value(
          metrics.daml.commands.validation,
          validator
            .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()))
        .fold(
          Future.failed,
          service.submit(_).map(_ => Empty.defaultInstance)(DirectExecutionContext))
    )

  override def bindService(): ServerServiceDefinition =
    CommandSubmissionServiceGrpc.bindService(this, DirectExecutionContext)

} 
Example 19
Source File: DispatcherImpl.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.akkastreams.dispatcher

import java.util.concurrent.atomic.AtomicReference

import akka.NotUsed
import akka.stream.scaladsl.Source
import com.github.ghik.silencer.silent
import org.slf4j.LoggerFactory

import scala.collection.immutable

final class DispatcherImpl[Index: Ordering](
    name: String,
    zeroIndex: Index,
    headAtInitialization: Index)
    extends Dispatcher[Index] {

  private val logger = LoggerFactory.getLogger(getClass)

  require(
    !indexIsBeforeZero(headAtInitialization),
    s"head supplied at Dispatcher initialization $headAtInitialization is before zero index $zeroIndex. " +
      s"This would imply that the ledger end is before the ledger begin, which makes this invalid configuration."
  )

  private sealed abstract class State extends Product with Serializable {
    def getSignalDispatcher: Option[SignalDispatcher]

    def getLastIndex: Index
  }

  // the following silent are due to
  // <https://github.com/scala/bug/issues/4440>
  @silent
  private final case class Running(lastIndex: Index, signalDispatcher: SignalDispatcher)
      extends State {
    override def getLastIndex: Index = lastIndex

    override def getSignalDispatcher: Option[SignalDispatcher] = Some(signalDispatcher)
  }

  @silent
  private final case class Closed(lastIndex: Index) extends State {
    override def getLastIndex: Index = lastIndex

    override def getSignalDispatcher: Option[SignalDispatcher] = None
  }

  // So why not broadcast the actual new index, instead of using a signaller?
  // The reason is if we do that, the new indices race with readHead
  // in a way that makes it hard to start up new subscriptions. In particular,
  // we can tolerate NewIndexSignals being out of order or dropped, maintaining the weaker invariant that,
  // if head is updated, at least one NewIndexSignal eventually arrives.

  private val state = new AtomicReference[State](Running(headAtInitialization, SignalDispatcher()))

  
    override def apply(newHead: Index): immutable.Iterable[(Index, Index)] =
      if (Ordering[Index].gt(newHead, max)) {
        val intervalBegin = max
        max = newHead
        List(intervalBegin -> newHead)
      } else Nil
  }

  private def indexIsBeforeZero(checkedIndex: Index): Boolean =
    Ordering[Index].gt(zeroIndex, checkedIndex)

  def close(): Unit =
    state.getAndUpdate {
      case Running(idx, _) => Closed(idx)
      case c: Closed => c
    } match {
      case Running(idx, disp) =>
        disp.signal()
        disp.close()
      case c: Closed => ()
    }

  private def closedError: IllegalStateException =
    new IllegalStateException(s"$name: Dispatcher is closed")

} 
Example 20
Source File: Tests.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.testtool

import java.nio.file.Path

import com.daml.ledger.api.testtool
import com.daml.ledger.api.testtool.infrastructure.{BenchmarkReporter, LedgerTestSuite}
import com.daml.ledger.api.testtool.tests._
import org.slf4j.LoggerFactory

object Tests {
  type Tests = Map[String, LedgerTestSuite]

  
  def performanceTests(path: Option[Path]): Tests = {
    val reporter =
      (key: String, value: Double) =>
        path
          .map(BenchmarkReporter.toFile)
          .getOrElse(BenchmarkReporter.toStream(System.out))
          .addReport(key, value)

    Envelope.values.flatMap { envelope =>
      {
        val throughputKey: String = performanceEnvelopeThroughputTestKey(envelope)
        val latencyKey: String = performanceEnvelopeLatencyTestKey(envelope)
        val transactionSizeKey: String = performanceEnvelopeTransactionSizeTestKey(envelope)
        List(
          throughputKey -> new testtool.tests.PerformanceEnvelope.ThroughputTest(
            logger = LoggerFactory.getLogger(throughputKey),
            envelope = envelope,
            reporter = reporter,
          ),
          latencyKey -> new testtool.tests.PerformanceEnvelope.LatencyTest(
            logger = LoggerFactory.getLogger(latencyKey),
            envelope = envelope,
            reporter = reporter,
          ),
          transactionSizeKey -> new testtool.tests.PerformanceEnvelope.TransactionSizeScaleTest(
            logger = LoggerFactory.getLogger(transactionSizeKey),
            envelope = envelope,
          ),
        )
      }
    }
  }.toMap

  private[this] def performanceEnvelopeThroughputTestKey(envelope: Envelope): String =
    s"PerformanceEnvelope.${envelope.name}.Throughput"
  private[this] def performanceEnvelopeLatencyTestKey(envelope: Envelope): String =
    s"PerformanceEnvelope.${envelope.name}.Latency"
  private[this] def performanceEnvelopeTransactionSizeTestKey(envelope: Envelope): String =
    s"PerformanceEnvelope.${envelope.name}.TransactionSize"

  private[testtool] val PerformanceTestsKeys =
    Envelope.values.flatMap { envelope =>
      List(
        performanceEnvelopeThroughputTestKey(envelope),
        performanceEnvelopeLatencyTestKey(envelope),
        performanceEnvelopeTransactionSizeTestKey(envelope)),
    }
} 
Example 21
Source File: ParticipantSessionManager.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.testtool.infrastructure.participant

import io.grpc.netty.{NegotiationType, NettyChannelBuilder}
import io.netty.channel.nio.NioEventLoopGroup
import io.netty.channel.socket.nio.NioSocketChannel
import io.netty.util.concurrent.DefaultThreadFactory
import org.slf4j.LoggerFactory

import scala.collection.concurrent.TrieMap
import scala.concurrent.{ExecutionContext, Future}

private[infrastructure] final class ParticipantSessionManager {

  private[this] val logger = LoggerFactory.getLogger(classOf[ParticipantSession])

  private[this] val channels = TrieMap.empty[ParticipantSessionConfiguration, ParticipantSession]

  @throws[RuntimeException]
  private def create(
      config: ParticipantSessionConfiguration,
  )(implicit ec: ExecutionContext): ParticipantSession = {
    logger.info(s"Connecting to participant at ${config.host}:${config.port}...")
    val threadFactoryPoolName = s"grpc-event-loop-${config.host}-${config.port}"
    val daemonThreads = false
    val threadFactory: DefaultThreadFactory =
      new DefaultThreadFactory(threadFactoryPoolName, daemonThreads)
    logger.info(
      s"gRPC thread factory instantiated with pool '$threadFactoryPoolName' (daemon threads: $daemonThreads)",
    )
    val threadCount = Runtime.getRuntime.availableProcessors
    val eventLoopGroup: NioEventLoopGroup =
      new NioEventLoopGroup(threadCount, threadFactory)
    logger.info(
      s"gRPC event loop thread group instantiated with $threadCount threads using pool '$threadFactoryPoolName'",
    )
    val managedChannelBuilder = NettyChannelBuilder
      .forAddress(config.host, config.port)
      .eventLoopGroup(eventLoopGroup)
      .channelType(classOf[NioSocketChannel])
      .directExecutor()
      .usePlaintext()
    for (ssl <- config.ssl; sslContext <- ssl.client) {
      logger.info("Setting up managed communication channel with transport security")
      managedChannelBuilder
        .useTransportSecurity()
        .sslContext(sslContext)
        .negotiationType(NegotiationType.TLS)
    }
    managedChannelBuilder.maxInboundMessageSize(10000000)
    val managedChannel = managedChannelBuilder.build()
    logger.info(s"Connection to participant at ${config.host}:${config.port}")
    new ParticipantSession(config, managedChannel, eventLoopGroup)
  }

  def getOrCreate(
      configuration: ParticipantSessionConfiguration,
  )(implicit ec: ExecutionContext): Future[ParticipantSession] =
    Future(channels.getOrElseUpdate(configuration, create(configuration)))

  def close(configuration: ParticipantSessionConfiguration): Unit =
    channels.get(configuration).foreach(_.close())

  def closeAll(): Unit =
    for ((_, session) <- channels) {
      session.close()
    }

} 
Example 22
Source File: ParticipantSession.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.testtool.infrastructure.participant

import com.daml.ledger.api.testtool.infrastructure.LedgerServices
import com.daml.ledger.api.v1.ledger_identity_service.GetLedgerIdentityRequest
import com.daml.ledger.api.v1.transaction_service.GetLedgerEndRequest
import com.daml.timer.RetryStrategy
import io.grpc.ManagedChannel
import io.netty.channel.nio.NioEventLoopGroup
import org.slf4j.LoggerFactory

import scala.concurrent.duration.{DurationInt, SECONDS}
import scala.concurrent.{ExecutionContext, Future}

private[participant] final class ParticipantSession(
    val config: ParticipantSessionConfiguration,
    channel: ManagedChannel,
    eventLoopGroup: NioEventLoopGroup,
)(implicit val executionContext: ExecutionContext) {

  private[this] val logger = LoggerFactory.getLogger(classOf[ParticipantSession])

  private[this] val services: LedgerServices = new LedgerServices(channel)

  // The ledger identifier is retrieved only once when the participant session is created
  // Changing the ledger identifier during the execution of a session can result in unexpected consequences
  // The test tool is designed to run tests in an isolated environment but changing the
  // global state of the ledger breaks this assumption, no matter what
  private[this] val ledgerIdF =
    RetryStrategy.exponentialBackoff(10, 10.millis) { (attempt, wait) =>
      logger.debug(s"Fetching ledgerId to create context (attempt #$attempt, next one in $wait)...")
      services.identity.getLedgerIdentity(new GetLedgerIdentityRequest).map(_.ledgerId)
    }

  private[testtool] def createTestContext(
      endpointId: String,
      applicationId: String,
      identifierSuffix: String,
  ): Future[ParticipantTestContext] =
    for {
      ledgerId <- ledgerIdF
      end <- services.transaction.getLedgerEnd(new GetLedgerEndRequest(ledgerId)).map(_.getOffset)
    } yield
      new ParticipantTestContext(
        ledgerId,
        endpointId,
        applicationId,
        identifierSuffix,
        end,
        services,
        config.partyAllocation,
      )

  private[testtool] def close(): Unit = {
    logger.info(s"Disconnecting from participant at ${config.host}:${config.port}...")
    channel.shutdownNow()
    if (!channel.awaitTermination(10L, SECONDS)) {
      sys.error("Channel shutdown stuck. Unable to recover. Terminating.")
    }
    logger.info(s"Connection to participant at ${config.host}:${config.port} shut down.")
    if (!eventLoopGroup
        .shutdownGracefully(0, 0, SECONDS)
        .await(10L, SECONDS)) {
      sys.error("Unable to shutdown event loop. Unable to recover. Terminating.")
    }
    logger.info(s"Connection to participant at ${config.host}:${config.port} closed.")
  }
} 
Example 23
Source File: Debug.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils

import java.io.{DataOutputStream, FileOutputStream}

import com.daml.ledger.participant.state.kvutils.DamlKvutils._
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._


  def dumpLedgerEntry(
      submission: DamlSubmission,
      participantId: String,
      entryId: DamlLogEntryId,
      logEntry: DamlLogEntry,
      outputState: Map[DamlStateKey, DamlStateValue]): Unit =
    optLedgerDumpStream.foreach { outs =>
      val dumpEntry = DamlKvutils.LedgerDumpEntry.newBuilder
        .setSubmission(Envelope.enclose(submission))
        .setEntryId(entryId)
        .setParticipantId(participantId)
        .setLogEntry(Envelope.enclose(logEntry))
        .addAllOutputState(
          outputState.map {
            case (k, v) =>
              DamlKvutils.LedgerDumpEntry.StatePair.newBuilder
                .setStateKey(k)
                .setStateValue(Envelope.enclose(v))
                .build
          }.asJava
        )
        .build

      // Messages are delimited by a header containing the message size as int32
      outs.writeInt(dumpEntry.getSerializedSize)
      dumpEntry.writeTo(outs)
      outs.flush()
    }

} 
Example 24
Source File: CommitContext.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.committer

import com.daml.ledger.participant.state.kvutils.DamlKvutils.{
  DamlLogEntryId,
  DamlStateKey,
  DamlStateValue
}
import com.daml.ledger.participant.state.kvutils.{DamlStateMap, Err}
import com.daml.ledger.participant.state.v1.ParticipantId
import com.daml.lf.data.Time.Timestamp
import org.slf4j.LoggerFactory

import scala.collection.mutable


  def getOutputs: Iterable[(DamlStateKey, DamlStateValue)] =
    outputOrder
      .map(key => key -> outputs(key))
      .filterNot {
        case (key, value) if inputAlreadyContains(key, value) =>
          logger.trace("Identical output found for key {}", key)
          true
        case _ => false
      }

  private def inputAlreadyContains(key: DamlStateKey, value: DamlStateValue): Boolean =
    inputs.get(key).exists(_.contains(value))
} 
Example 25
Source File: Committer.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.committer

import com.codahale.metrics.Timer
import com.daml.ledger.participant.state.kvutils.DamlKvutils.{
  DamlConfigurationEntry,
  DamlLogEntry,
  DamlLogEntryId,
  DamlStateKey,
  DamlStateValue
}
import com.daml.ledger.participant.state.kvutils.{Conversions, DamlStateMap, Err}
import com.daml.ledger.participant.state.kvutils.committer.Committer._
import com.daml.ledger.participant.state.v1.{Configuration, ParticipantId}
import com.daml.lf.data.Time
import com.daml.metrics.Metrics
import org.slf4j.{Logger, LoggerFactory}


        throw Err.MissingInputState(Conversions.configurationStateKey)
      )
      .flatMap { v =>
        val entry = v.getConfigurationEntry
        Configuration
          .decode(entry.getConfiguration)
          .fold({ err =>
            logger.error(s"Failed to parse configuration: $err, using default configuration.")
            None
          }, conf => Some(Some(entry) -> conf))
      }
      .getOrElse(None -> defaultConfig)
} 
Example 26
Source File: LedgerDataExporter.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.export

import java.io.{DataOutputStream, FileOutputStream}
import java.time.Instant

import com.daml.ledger.participant.state.v1.ParticipantId
import com.daml.ledger.validator.LedgerStateOperations.{Key, Value}
import com.google.protobuf.ByteString
import org.slf4j.LoggerFactory

trait LedgerDataExporter {

  
  def finishedProcessing(correlationId: String): Unit
}

object LedgerDataExporter {
  val EnvironmentVariableName = "KVUTILS_LEDGER_EXPORT"

  private val logger = LoggerFactory.getLogger(this.getClass)

  private lazy val outputStreamMaybe: Option[DataOutputStream] = {
    Option(System.getenv(EnvironmentVariableName))
      .map { filename =>
        logger.info(s"Enabled writing ledger entries to $filename")
        new DataOutputStream(new FileOutputStream(filename))
      }
  }

  private lazy val instance = outputStreamMaybe
    .map(new FileBasedLedgerDataExporter(_))
    .getOrElse(NoopLedgerDataExporter)

  def apply(): LedgerDataExporter = instance
} 
Example 27
Source File: AuthorizationTest.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.http

import java.nio.file.Files

import akka.actor.ActorSystem
import akka.stream.Materializer
import com.daml.auth.TokenHolder
import com.daml.bazeltools.BazelRunfiles.rlocation
import com.daml.grpc.adapter.{AkkaExecutionSequencerPool, ExecutionSequencerFactory}
import com.daml.http.util.TestUtil.requiredFile
import com.daml.ledger.api.auth.{AuthServiceStatic, Claim, ClaimPublic, Claims}
import com.daml.ledger.client.LedgerClient
import org.scalatest.{AsyncFlatSpec, BeforeAndAfterAll, Matchers}
import org.slf4j.LoggerFactory

import scala.concurrent.ExecutionContext
import scala.util.control.NonFatal

final class AuthorizationTest extends AsyncFlatSpec with BeforeAndAfterAll with Matchers {

  private val dar = requiredFile(rlocation("docs/quickstart-model.dar"))
    .fold(e => throw new IllegalStateException(e), identity)

  private val testId: String = this.getClass.getSimpleName

  implicit val asys: ActorSystem = ActorSystem(testId)
  implicit val mat: Materializer = Materializer(asys)
  implicit val aesf: ExecutionSequencerFactory = new AkkaExecutionSequencerPool(testId)(asys)
  implicit val ec: ExecutionContext = asys.dispatcher

  private val publicToken = "public"
  private val emptyToken = "empty"
  private val mockedAuthService = Option(AuthServiceStatic {
    case `publicToken` => Claims(Seq[Claim](ClaimPublic))
    case `emptyToken` => Claims(Nil)
  })

  private val accessTokenFile = Files.createTempFile("Extractor", "AuthSpec")
  private val tokenHolder = Option(new TokenHolder(accessTokenFile))

  private def setToken(string: String): Unit = {
    val _ = Files.write(accessTokenFile, string.getBytes())
  }

  override protected def afterAll(): Unit = {
    super.afterAll()
    try {
      Files.delete(accessTokenFile)
    } catch {
      case NonFatal(e) =>
        LoggerFactory
          .getLogger(classOf[AuthorizationTest])
          .warn("Unable to delete temporary token file", e)
    }
  }

  protected def withLedger[A] =
    HttpServiceTestFixture
      .withLedger[A](List(dar), testId, Option(publicToken), mockedAuthService) _

  private def packageService(client: LedgerClient): PackageService =
    new PackageService(HttpService.loadPackageStoreUpdates(client.packageClient, tokenHolder))

  behavior of "PackageService against an authenticated sandbox"

  it should "fail immediately if the authorization is insufficient" in withLedger { client =>
    setToken(emptyToken)
    packageService(client).reload.failed.map(_ => succeed)
  }

  it should "succeed if the authorization is sufficient" in withLedger { client =>
    setToken(publicToken)
    packageService(client).reload.map(_ => succeed)
  }

} 
Example 28
Source File: AkkaBeforeAndAfterAll.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.testing.utils

import java.util.concurrent.Executors

import akka.actor.ActorSystem
import akka.stream.Materializer
import com.daml.grpc.adapter.{AkkaExecutionSequencerPool, ExecutionSequencerFactory}
import com.google.common.util.concurrent.ThreadFactoryBuilder
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.LoggerFactory

import scala.concurrent.duration.DurationInt
import scala.concurrent.{Await, ExecutionContext}

trait AkkaBeforeAndAfterAll extends BeforeAndAfterAll {
  self: Suite =>
  private val logger = LoggerFactory.getLogger(getClass)

  protected def actorSystemName: String = this.getClass.getSimpleName

  private implicit lazy val executionContext: ExecutionContext =
    ExecutionContext.fromExecutorService(
      Executors.newSingleThreadExecutor(
        new ThreadFactoryBuilder()
          .setDaemon(true)
          .setNameFormat(s"$actorSystemName-thread-pool-worker-%d")
          .setUncaughtExceptionHandler((thread, _) =>
            logger.error(s"got an uncaught exception on thread: ${thread.getName}"))
          .build()))

  protected implicit lazy val system: ActorSystem =
    ActorSystem(actorSystemName, defaultExecutionContext = Some(executionContext))

  protected implicit lazy val materializer: Materializer = Materializer(system)

  protected implicit lazy val executionSequencerFactory: ExecutionSequencerFactory =
    new AkkaExecutionSequencerPool(poolName = actorSystemName, actorCount = 1)

  override protected def afterAll(): Unit = {
    executionSequencerFactory.close()
    materializer.shutdown()
    Await.result(system.terminate(), 30.seconds)
    super.afterAll()
  }
} 
Example 29
Source File: AkkaStreamPerformanceTest.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.perf.util

import akka.actor.ActorSystem
import akka.stream.Materializer
import com.daml.ledger.api.testing.utils.Resource
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}

@SuppressWarnings(Array("org.wartremover.warts.LeakingSealed"))
abstract class AkkaStreamPerformanceTest extends PerformanceTest {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  type ResourceType

  @volatile protected var system: ActorSystem = _
  @volatile protected var materializer: Materializer = _
  @transient protected implicit val ec: ExecutionContextExecutor = ExecutionContext.global

  protected def resource: Resource[ResourceType]

  protected def setup(): Unit = {
    resource.setup()
    implicit val sys: ActorSystem = ActorSystem(this.getClass.getSimpleName.stripSuffix("$"))
    system = sys
    materializer = Materializer(system)
  }

  protected def teardown(): Unit = {
    await(system.terminate())
    resource.close()
  }

  implicit class FixtureSetup[T](using: Using[T]) extends Serializable {
    def withLifecycleManagement(additionalSetup: T => Unit = _ => ()): Using[T] =
      using
        .setUp { input =>
          try {
            setup()
            additionalSetup(input)
          } catch {
            case t: Throwable =>
              logger.error("Setup failed.", t)
              throw t
          }
        }
        .tearDown { _ =>
          try {
            teardown()
          } catch {
            case t: Throwable =>
              logger.error("Teardown failed.", t)
              throw t
          }
        }
  }
} 
Example 30
Source File: Main.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.codegen

import java.io.File
import java.nio.file.Path

import ch.qos.logback.classic.Level
import com.daml.lf.codegen.conf.Conf
import com.typesafe.scalalogging.StrictLogging
import org.slf4j.{Logger, LoggerFactory}
import scalaz.Cord

import scala.collection.breakOut

object Main extends StrictLogging {

  private val codegenId = "Scala Codegen"

  @deprecated("Use codegen font-end: com.daml.codegen.CodegenMain.main", "0.13.23")
  def main(args: Array[String]): Unit =
    Conf.parse(args) match {
      case Some(conf) =>
        generateCode(conf)
      case None =>
        throw new IllegalArgumentException(
          s"Invalid ${codegenId: String} command line arguments: ${args.mkString(" "): String}")
    }

  def generateCode(conf: Conf): Unit = conf match {
    case Conf(darMap, outputDir, decoderPkgAndClass, verbosity, roots) =>
      setGlobalLogLevel(verbosity)
      logUnsupportedEventDecoderOverride(decoderPkgAndClass)
      val (dars, packageName) = darsAndOnePackageName(darMap)
      CodeGen.generateCode(dars, packageName, outputDir.toFile, CodeGen.Novel, roots)
  }

  private def setGlobalLogLevel(verbosity: Level): Unit = {
    LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME) match {
      case a: ch.qos.logback.classic.Logger =>
        a.setLevel(verbosity)
        logger.info(s"${codegenId: String} verbosity: ${verbosity.toString}")
      case _ =>
        logger.warn(s"${codegenId: String} cannot set requested verbosity: ${verbosity.toString}")
    }
  }

  private def logUnsupportedEventDecoderOverride(mapping: Option[(String, String)]): Unit =
    mapping.foreach {
      case (a, b) =>
        logger.warn(
          s"${codegenId: String} does not allow overriding Event Decoder, skipping: ${a: String} -> ${b: String}")
    }

  private def darsAndOnePackageName(darMap: Map[Path, Option[String]]): (List[File], String) = {
    val dars: List[File] = darMap.keys.map(_.toFile)(breakOut)
    val uniquePackageNames: Set[String] = darMap.values.collect { case Some(x) => x }(breakOut)
    uniquePackageNames.toSeq match {
      case Seq(packageName) =>
        (dars, packageName)
      case _ =>
        throw new IllegalStateException(
          s"${codegenId: String} expects all dars mapped to the same package name, " +
            s"requested: ${format(darMap): String}")
    }
  }

  private def format(map: Map[Path, Option[String]]): String = {
    val cord = map.foldLeft(Cord("{")) { (str, kv) =>
      str ++ kv._1.toFile.getAbsolutePath ++ "->" ++ kv._2.toString ++ ","
    }
    (cord ++ "}").toString
  }
} 
Example 31
Source File: ContextualizedLogger.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.logging

import akka.NotUsed
import akka.stream.scaladsl.Flow
import com.daml.grpc.GrpcException
import io.grpc.Status
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.concurrent.TrieMap
import scala.util.{Failure, Try}
import scala.util.control.NonFatal

object ContextualizedLogger {

  // Caches loggers to prevent them from needlessly wasting memory
  // Replicates the behavior of the underlying Slf4j logger factory
  private[this] val cache = TrieMap.empty[String, ContextualizedLogger]

  // Allows to explicitly pass a logger, should be used for testing only
  private[logging] def createFor(withoutContext: Logger): ContextualizedLogger =
    new ContextualizedLogger(withoutContext)

  // Slf4j handles the caching of the underlying logger itself
  private[logging] def createFor(name: String): ContextualizedLogger =
    createFor(LoggerFactory.getLogger(name))

  
  def get(clazz: Class[_]): ContextualizedLogger = {
    val name = clazz.getName.stripSuffix("$")
    cache.getOrElseUpdate(name, createFor(name))
  }

}

final class ContextualizedLogger private (val withoutContext: Logger) {

  val trace = new LeveledLogger.Trace(withoutContext)
  val debug = new LeveledLogger.Debug(withoutContext)
  val info = new LeveledLogger.Info(withoutContext)
  val warn = new LeveledLogger.Warn(withoutContext)
  val error = new LeveledLogger.Error(withoutContext)

  private def internalOrUnknown(code: Status.Code): Boolean =
    code == Status.Code.INTERNAL || code == Status.Code.UNKNOWN

  private def logError(t: Throwable)(implicit logCtx: LoggingContext): Unit =
    error("Unhandled internal error", t)

  def logErrorsOnCall[Out](implicit logCtx: LoggingContext): PartialFunction[Try[Out], Unit] = {
    case Failure(e @ GrpcException(s, _)) =>
      if (internalOrUnknown(s.getCode)) {
        logError(e)
      }
    case Failure(NonFatal(e)) =>
      logError(e)
  }

  def logErrorsOnStream[Out](implicit logCtx: LoggingContext): Flow[Out, Out, NotUsed] =
    Flow[Out].mapError {
      case e @ GrpcException(s, _) =>
        if (internalOrUnknown(s.getCode)) {
          logError(e)
        }
        e
      case NonFatal(e) =>
        logError(e)
        e
    }

} 
Example 32
Source File: Evaluation.scala    From glintlda   with MIT License 5 votes vote down vote up
package glintlda

import akka.util.Timeout
import breeze.numerics._
import com.typesafe.scalalogging.slf4j.Logger
import glint.iterators.RowBlockIterator
import org.slf4j.LoggerFactory

import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext}


  def logCurrentState(iteration: Int, docLoglikelihood: Double, tokenCounts: Long, model: LDAModel): Unit = {

    // Construct necessary variables for pipelined communication with parameter server
    implicit val ec = ExecutionContext.Implicits.global
    implicit val timeout = new Timeout(300 seconds)

    // Get the independently computed log likelihood numbers
    val wordLoglikelihood = computeWordLoglikelihood(model)
    val loglikelihood = docLoglikelihood + wordLoglikelihood

    // Compute perplexity
    val perplexity = Math.exp(-loglikelihood / tokenCounts)

    // Print to log
    val logger = Logger(LoggerFactory getLogger s"${getClass.getSimpleName}")
    logger.info(s"Evaluation after iteration ${iteration}")
    logger.info(s"Doc log-likelihood:  ${docLoglikelihood}")
    logger.info(s"Word log-likelihood: ${wordLoglikelihood}")
    logger.info(s"Log-likelihood:      ${loglikelihood}")
    logger.info(s"Token counts:        ${tokenCounts}")
    logger.info(s"Perplexity:          ${perplexity}")

  }

} 
Example 33
Source File: RouterMetrics.scala    From prometheus-akka   with Apache License 2.0 5 votes vote down vote up
package com.workday.prometheus.akka

import scala.collection.concurrent.TrieMap
import scala.util.control.NonFatal

import org.slf4j.LoggerFactory

import io.prometheus.client.{Counter, Gauge}

object RouterMetrics {
  private val logger = LoggerFactory.getLogger(RouterMetrics.getClass)
  private val map = TrieMap[Entity, RouterMetrics]()
  def metricsFor(e: Entity): Option[RouterMetrics] = {
    try {
      Some(map.getOrElseUpdate(e, new RouterMetrics(e)))
    } catch {
      case NonFatal(t) => {
        logger.warn("Issue with getOrElseUpdate (failing over to simple get)", t)
        map.get(e)
      }
    }
  }
  def hasMetricsFor(e: Entity): Boolean = map.contains(e)
}

class RouterMetrics(entity: Entity) {
  val actorName = metricFriendlyActorName(entity.name)
  val routingTime = Gauge.build().name(s"akka_router_routing_time_$actorName").help("Akka Router routing time (Seconds)").register()
  val processingTime = Gauge.build().name(s"akka_router_processing_time_$actorName").help("Akka Router processing time (Seconds)").register()
  val timeInMailbox = Gauge.build().name(s"akka_router_time_in_mailbox_$actorName").help("Akka Router time in mailbox (Seconds)").register()
  val messages = Counter.build().name(s"akka_router_message_count_$actorName").help("Akka Router messages").register()
  val errors = Counter.build().name(s"akka_router_error_count_$actorName").help("Akka Router errors").register()
} 
Example 34
Source File: ActorMetrics.scala    From prometheus-akka   with Apache License 2.0 5 votes vote down vote up
package com.workday.prometheus.akka

import scala.collection.concurrent.TrieMap
import scala.util.control.NonFatal

import org.slf4j.LoggerFactory

import io.prometheus.client.{Counter, Gauge}

object ActorMetrics {
  private val logger = LoggerFactory.getLogger(ActorMetrics.getClass)
  private val map = TrieMap[Entity, ActorMetrics]()
  def metricsFor(e: Entity): Option[ActorMetrics] = {
    try {
      Some(map.getOrElseUpdate(e, new ActorMetrics(e)))
    } catch {
      case NonFatal(t) => {
        logger.warn("Issue with getOrElseUpdate (failing over to simple get)", t)
        map.get(e)
      }
    }
  }
  def hasMetricsFor(e: Entity): Boolean = map.contains(e)
}

class ActorMetrics(entity: Entity) {
  val actorName = metricFriendlyActorName(entity.name)
  val mailboxSize = Gauge.build().name(s"akka_actor_mailbox_size_$actorName").help("Akka Actor mailbox size").register()
  val processingTime = Gauge.build().name(s"akka_actor_processing_time_$actorName").help("Akka Actor processing time (Seconds)").register()
  val timeInMailbox = Gauge.build().name(s"akka_actor_time_in_mailbox_$actorName").help("Akka Actor time in mailbox (Seconds)").register()
  val messages = Counter.build().name(s"akka_actor_message_count_$actorName").help("Akka Actor messages").register()
  val errors = Counter.build().name(s"akka_actor_error_count_$actorName").help("Akka Actor errors").register()
} 
Example 35
Source File: TextClassifier.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.example.textclassification

import com.intel.analytics.bigdl.example.utils._
import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, _}
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T}
import org.apache.log4j.{Level => Levle4j, Logger => Logger4j}
import org.slf4j.{Logger, LoggerFactory}
import scopt.OptionParser

import scala.collection.mutable.{ArrayBuffer, Map => MMap}
import scala.language.existentials

object TextClassifier {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  LoggerFilter.redirectSparkInfoLogs()
  Logger4j.getLogger("com.intel.analytics.bigdl.optim").setLevel(Levle4j.INFO)

  def main(args: Array[String]): Unit = {
    val localParser = new OptionParser[TextClassificationParams]("BigDL Example") {
      opt[String]('b', "baseDir")
        .required()
        .text("Base dir containing the training and word2Vec data")
        .action((x, c) => c.copy(baseDir = x))
      opt[String]('p', "partitionNum")
        .text("you may want to tune the partitionNum if run into spark mode")
        .action((x, c) => c.copy(partitionNum = x.toInt))
      opt[String]('s', "maxSequenceLength")
        .text("maxSequenceLength")
        .action((x, c) => c.copy(maxSequenceLength = x.toInt))
      opt[String]('w', "maxWordsNum")
        .text("maxWordsNum")
        .action((x, c) => c.copy(maxWordsNum = x.toInt))
      opt[String]('l', "trainingSplit")
        .text("trainingSplit")
        .action((x, c) => c.copy(trainingSplit = x.toDouble))
      opt[String]('z', "batchSize")
        .text("batchSize")
        .action((x, c) => c.copy(batchSize = x.toInt))
      opt[Int]('l', "learningRate")
        .text("learningRate")
        .action((x, c) => c.copy(learningRate = x))
    }

    localParser.parse(args, TextClassificationParams()).map { param =>
      log.info(s"Current parameters: $param")
      val textClassification = new TextClassifier(param)
      textClassification.train()
    }
  }
} 
Example 36
Source File: TimestampLogicalType.scala    From embulk-output-s3_parquet   with MIT License 5 votes vote down vote up
package org.embulk.output.s3_parquet.parquet

import java.time.ZoneId

import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types}
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.{
  MICROS,
  MILLIS,
  NANOS
}
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.embulk.config.ConfigException
import org.embulk.output.s3_parquet.catalog.GlueDataType
import org.embulk.spi.`type`.{
  BooleanType,
  DoubleType,
  JsonType,
  LongType,
  StringType,
  TimestampType
}
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
import org.embulk.spi.Column
import org.msgpack.value.Value
import org.slf4j.{Logger, LoggerFactory}

case class TimestampLogicalType(
    isAdjustedToUtc: Boolean,
    timeUnit: TimeUnit,
    timeZone: ZoneId
) extends ParquetColumnType {
  private val logger: Logger =
    LoggerFactory.getLogger(classOf[TimestampLogicalType])

  override def primitiveType(column: Column): PrimitiveType =
    column.getType match {
      case _: LongType | _: TimestampType =>
        Types
          .optional(PrimitiveTypeName.INT64)
          .as(LogicalTypeAnnotation.timestampType(isAdjustedToUtc, timeUnit))
          .named(column.getName)
      case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def glueDataType(column: Column): GlueDataType =
    column.getType match {
      case _: LongType | _: TimestampType =>
        timeUnit match {
          case MILLIS => GlueDataType.TIMESTAMP
          case MICROS | NANOS =>
            warningWhenConvertingTimestampToGlueType(GlueDataType.BIGINT)
            GlueDataType.BIGINT
        }
      case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit =
    throw newUnsupportedMethodException("consumeBoolean")
  override def consumeString(consumer: RecordConsumer, v: String): Unit =
    throw newUnsupportedMethodException("consumeString")

  override def consumeLong(consumer: RecordConsumer, v: Long): Unit =
    consumer.addLong(v)

  override def consumeDouble(consumer: RecordConsumer, v: Double): Unit =
    throw newUnsupportedMethodException("consumeDouble")

  override def consumeTimestamp(
      consumer: RecordConsumer,
      v: Timestamp,
      formatter: TimestampFormatter
  ): Unit = timeUnit match {
    case MILLIS => consumer.addLong(v.toEpochMilli)
    case MICROS =>
      consumer.addLong(v.getEpochSecond * 1_000_000L + (v.getNano / 1_000L))
    case NANOS =>
      consumer.addLong(v.getEpochSecond * 1_000_000_000L + v.getNano)
  }

  override def consumeJson(consumer: RecordConsumer, v: Value): Unit =
    throw newUnsupportedMethodException("consumeJson")

  private def warningWhenConvertingTimestampToGlueType(
      glueType: GlueDataType
  ): Unit =
    logger.warn(
      s"timestamp(isAdjustedToUtc = $isAdjustedToUtc, timeUnit = $timeUnit) is converted" +
        s" to Glue ${glueType.name} but this is not represented correctly, because Glue" +
        s" does not support time type. Please use `catalog.column_options` to define the type."
    )
} 
Example 37
Source File: JsonLogicalType.scala    From embulk-output-s3_parquet   with MIT License 5 votes vote down vote up
package org.embulk.output.s3_parquet.parquet
import org.apache.parquet.io.api.{Binary, RecordConsumer}
import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types}
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.embulk.config.ConfigException
import org.embulk.output.s3_parquet.catalog.GlueDataType
import org.embulk.spi.Column
import org.embulk.spi.`type`.{
  BooleanType,
  DoubleType,
  JsonType,
  LongType,
  StringType,
  TimestampType
}
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
import org.msgpack.value.{Value, ValueFactory}
import org.slf4j.{Logger, LoggerFactory}

object JsonLogicalType extends ParquetColumnType {
  private val logger: Logger = LoggerFactory.getLogger(JsonLogicalType.getClass)
  override def primitiveType(column: Column): PrimitiveType =
    column.getType match {
      case _: BooleanType | _: LongType | _: DoubleType | _: StringType |
          _: JsonType =>
        Types
          .optional(PrimitiveTypeName.BINARY)
          .as(LogicalTypeAnnotation.jsonType())
          .named(column.getName)
      case _: TimestampType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def glueDataType(column: Column): GlueDataType =
    column.getType match {
      case _: BooleanType | _: LongType | _: DoubleType | _: StringType |
          _: JsonType =>
        warningWhenConvertingJsonToGlueType(GlueDataType.STRING)
        GlueDataType.STRING
      case _: TimestampType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit =
    consumeJson(consumer, ValueFactory.newBoolean(v))

  override def consumeString(consumer: RecordConsumer, v: String): Unit =
    consumeJson(consumer, ValueFactory.newString(v))

  override def consumeLong(consumer: RecordConsumer, v: Long): Unit =
    consumeJson(consumer, ValueFactory.newInteger(v))

  override def consumeDouble(consumer: RecordConsumer, v: Double): Unit =
    consumeJson(consumer, ValueFactory.newFloat(v))

  override def consumeTimestamp(
      consumer: RecordConsumer,
      v: Timestamp,
      formatter: TimestampFormatter
  ): Unit = throw newUnsupportedMethodException("consumeTimestamp")

  override def consumeJson(consumer: RecordConsumer, v: Value): Unit =
    consumer.addBinary(Binary.fromString(v.toJson))

  private def warningWhenConvertingJsonToGlueType(
      glueType: GlueDataType
  ): Unit = {
    logger.warn(
      s"json is converted" +
        s" to Glue ${glueType.name} but this is not represented correctly, because Glue" +
        s" does not support json type. Please use `catalog.column_options` to define the type."
    )
  }

} 
Example 38
Source File: package.scala    From zio-metrics   with Apache License 2.0 5 votes vote down vote up
package zio.metrics.dropwizard

import zio.{ Has, Layer, Task, ZLayer }
import java.util.concurrent.TimeUnit
import java.io.File
import java.util.Locale
import java.net.InetSocketAddress
import java.util.concurrent.TimeUnit
import org.slf4j.LoggerFactory
import java.{ util => ju }
import java.io.File

package object reporters {

  import com.codahale.metrics.MetricRegistry
  import com.codahale.metrics.MetricFilter
  import com.codahale.metrics.graphite.Graphite
  import com.codahale.metrics.graphite.GraphiteReporter
  import com.codahale.metrics.ConsoleReporter
  import com.codahale.metrics.Slf4jReporter
  import com.codahale.metrics.CsvReporter
  import com.codahale.metrics.jmx.JmxReporter
  import com.codahale.metrics.Reporter

  type Reporters = Has[Reporters.Service]

  object Reporters {
    trait Service {
      def jmx(r: MetricRegistry): Task[JmxReporter]

      def console(r: MetricRegistry): Task[ConsoleReporter]

      def slf4j(r: MetricRegistry, duration: Int, unit: TimeUnit, loggerName: String): Task[Slf4jReporter]

      def csv(r: MetricRegistry, file: File, locale: Locale): Task[Reporter]

      def graphite(r: MetricRegistry, host: String, port: Int, prefix: String): Task[GraphiteReporter]
    }

    val live: Layer[Nothing, Reporters] = ZLayer.succeed(new Service {

      def jmx(r: MetricRegistry): zio.Task[JmxReporter] = Task(JmxReporter.forRegistry(r).build())

      def console(r: MetricRegistry): Task[ConsoleReporter] = Task(
        ConsoleReporter
          .forRegistry(r)
          .convertRatesTo(TimeUnit.SECONDS)
          .convertDurationsTo(TimeUnit.MILLISECONDS)
          .build()
      )

      def slf4j(r: MetricRegistry, duration: Int, unit: TimeUnit, loggerName: String): Task[Slf4jReporter] =
        Task(
          Slf4jReporter
            .forRegistry(r)
            .outputTo(LoggerFactory.getLogger(loggerName))
            .convertRatesTo(TimeUnit.SECONDS)
            .convertDurationsTo(TimeUnit.MILLISECONDS)
            .build()
        )

      def csv(r: MetricRegistry, file: File, locale: ju.Locale): zio.Task[Reporter] = Task(
        CsvReporter
          .forRegistry(r)
          .formatFor(locale)
          .convertRatesTo(TimeUnit.SECONDS)
          .convertDurationsTo(TimeUnit.MILLISECONDS)
          .build(file)
      )

      def graphite(r: MetricRegistry, host: String, port: Int, prefix: String): zio.Task[GraphiteReporter] =
        Task {
          val graphite = new Graphite(new InetSocketAddress(host, port))
          GraphiteReporter
            .forRegistry(r)
            .prefixedWith(prefix)
            .convertRatesTo(TimeUnit.SECONDS)
            .convertDurationsTo(TimeUnit.MILLISECONDS)
            .filter(MetricFilter.ALL)
            .build(graphite)
        }
    })
  }
} 
Example 39
Source File: DockerPostgresService.scala    From crm-seed   with Apache License 2.0 5 votes vote down vote up
package com.dataengi.crm.common.docker

import java.sql.DriverManager

import com.spotify.docker.client.{DefaultDockerClient, DockerClient}
import com.whisk.docker.impl.spotify.SpotifyDockerFactory
import com.whisk.docker.{
  DockerCommandExecutor,
  DockerContainer,
  DockerContainerState,
  DockerFactory,
  DockerKit,
  DockerReadyChecker
}
import org.slf4j.LoggerFactory

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Try

trait DockerPostgresService extends DockerKit {

  import scala.concurrent.duration._

  private lazy val log = LoggerFactory.getLogger(this.getClass)

  private val client: DockerClient = DefaultDockerClient.fromEnv().build()

  override implicit val dockerFactory: DockerFactory = new SpotifyDockerFactory(client)

  def PostgresAdvertisedPort = 5432
  def PostgresExposedPort    = 44444
  val PostgresUser           = "nph"
  val PostgresPassword       = "suitup"

  lazy val DockerPostgresHost: String = postgresContainer.hostname.getOrElse("localhost")
  lazy val DockerPostgresPort: Int    = PostgresExposedPort
  lazy val DockerDatabaseName: String = "crm"

  val postgresContainer: DockerContainer = DockerContainer("postgres:9.5")
    .withPorts((PostgresAdvertisedPort, Some(PostgresExposedPort)))
    .withEnv(s"POSTGRES_USER=$PostgresUser", s"POSTGRES_PASSWORD=$PostgresPassword")
    .withCommand()
    .withReadyChecker(
      PostgresReadyChecker(DockerDatabaseName, PostgresUser, PostgresPassword, Some(PostgresExposedPort))
        .looped(15, 1.second)
    )

  lazy val dockerTestDataBaseConf: Map[String, Any] = Map[String, Any](
    "slick.dbs.default.driver"            -> "slick.driver.PostgresDriver$",
    "slick.dbs.default.db.driver"         -> "org.postgresql.Driver",
    "slick.dbs.default.db.user"           -> PostgresUser,
    "slick.dbs.default.db.password"       -> PostgresPassword,
    "slick.dbs.default.db.url"            -> s"jdbc:postgresql://$DockerPostgresHost:$DockerPostgresPort/crm",
    "slick.dbs.default.db.properties.url" -> s"jdbc:postgresql://$DockerPostgresHost:$DockerPostgresPort/crm"
  )

  override def dockerContainers: List[DockerContainer] = postgresContainer :: super.dockerContainers
}

case class PostgresReadyChecker(databaseName: String, user: String, password: String, port: Option[Int] = None)
    extends DockerReadyChecker {

  override def apply(container: DockerContainerState)(implicit docker: DockerCommandExecutor,
                                                      ec: ExecutionContext): Future[Boolean] =
    container
      .getPorts()
      .map(ports =>
        Try {
          Class.forName("org.postgresql.Driver")
          val url = s"jdbc:postgresql://${docker.host}:${port.getOrElse(ports.values.head)}/"
          println(s"[postgres][docker][url] $url")
          Option(DriverManager.getConnection(url, user, password))
            .map { connection =>
              println(s"[posgres][docker][create-db][connection] isClosed=${connection.isClosed}")
              val statements = connection.createStatement()
              val result     = statements.executeUpdate(s"CREATE DATABASE $databaseName")
              println(s"[posgres][docker][create-db] result=$result")
              connection
            }
            .map(_.close)
            .isDefined
        }.getOrElse(false))
} 
Example 40
Source File: CounterEtlItem.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.loader.core

import org.apache.s2graph.counter.util.UnitConverter
import org.slf4j.LoggerFactory
import play.api.libs.json._
import scala.util.{Failure, Success, Try}

case class CounterEtlItem(ts: Long, service: String, action: String, item: String, dimension: JsValue, property: JsValue, useProfile: Boolean = false) {
   def toKafkaMessage: String = {
     s"$ts\t$service\t$action\t$item\t${dimension.toString()}\t${property.toString()}"
   }

   lazy val value = {
     (property \ "value").toOption match {
       case Some(JsNumber(n)) => n.longValue()
       case Some(JsString(s)) => s.toLong
       case None => 1L
       case _ => throw new Exception("wrong type")
     }
   }
 }

object CounterEtlItem {
   val log = LoggerFactory.getLogger(this.getClass)

   def apply(line: String): Option[CounterEtlItem] = {
     Try {
       val Array(ts, service, action, item, dimension, property) = line.split('\t')
       CounterEtlItem(UnitConverter.toMillis(ts.toLong), service, action, item, Json.parse(dimension), Json.parse(property))
     } match {
       case Success(item) =>
         Some(item)
       case Failure(ex) =>
         log.error(">>> failed")
         log.error(s"${ex.toString}: $line")
         None
     }
   }
 } 
Example 41
Source File: WithRedis.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.helper

import com.typesafe.config.Config
import org.apache.s2graph.counter.config.S2CounterConfig
import org.apache.s2graph.counter.util.Hashes
import org.slf4j.LoggerFactory
import redis.clients.jedis.exceptions.JedisException
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}

import scala.util.Try

class WithRedis(config: Config) {
   lazy val s2config = new S2CounterConfig(config)

   private val log = LoggerFactory.getLogger(getClass)

   val poolConfig = new JedisPoolConfig()
   poolConfig.setMaxTotal(150)
   poolConfig.setMaxIdle(50)
   poolConfig.setMaxWaitMillis(200)

   val jedisPools = s2config.REDIS_INSTANCES.map { case (host, port) =>
     new JedisPool(poolConfig, host, port)
   }

   def getBucketIdx(key: String): Int = {
     Hashes.murmur3(key) % jedisPools.size
   }

   def doBlockWithIndex[T](idx: Int)(f: Jedis => T): Try[T] = {
     Try {
       val pool = jedisPools(idx)

       var jedis: Jedis = null

       try {
         jedis = pool.getResource

         f(jedis)
       }
       catch {
         case e: JedisException =>
           pool.returnBrokenResource(jedis)

           jedis = null
           throw e
       }
       finally {
         if (jedis != null) {
           pool.returnResource(jedis)
         }
       }
     }
   }

   def doBlockWithKey[T](key: String)(f: Jedis => T): Try[T] = {
     doBlockWithIndex(getBucketIdx(key))(f)
   }
 } 
Example 42
Source File: CollectionCache.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.util

import java.net.InetAddress
import java.util.concurrent.TimeUnit

import com.google.common.cache.{Cache, CacheBuilder}
import org.slf4j.LoggerFactory

import scala.concurrent.{ExecutionContext, Future}
import scala.language.{postfixOps, reflectiveCalls}

case class CollectionCacheConfig(maxSize: Int, ttl: Int, negativeCache: Boolean = false, negativeTTL: Int = 600)

class CollectionCache[C <: { def nonEmpty: Boolean; def isEmpty: Boolean } ](config: CollectionCacheConfig) {
  private val cache: Cache[String, C] = CacheBuilder.newBuilder()
    .expireAfterWrite(config.ttl, TimeUnit.SECONDS)
    .maximumSize(config.maxSize)
    .build[String, C]()

//  private lazy val cache = new SynchronizedLruMap[String, (C, Int)](config.maxSize)
  private lazy val className = this.getClass.getSimpleName

  private lazy val log = LoggerFactory.getLogger(this.getClass)
  val localHostname = InetAddress.getLocalHost.getHostName

  def size = cache.size
  val maxSize = config.maxSize

  // cache statistics
  def getStatsString: String = {
    s"$localHostname ${cache.stats().toString}"
  }

  def withCache(key: String)(op: => C): C = {
    Option(cache.getIfPresent(key)) match {
      case Some(r) => r
      case None =>
        val r = op
        if (r.nonEmpty || config.negativeCache) {
          cache.put(key, r)
        }
        r
    }
  }

  def withCacheAsync(key: String)(op: => Future[C])(implicit ec: ExecutionContext): Future[C] = {
    Option(cache.getIfPresent(key)) match {
      case Some(r) => Future.successful(r)
      case None =>
        op.map { r =>
          if (r.nonEmpty || config.negativeCache) {
            cache.put(key, r)
          }
          r
        }
    }
  }

  def purgeKey(key: String) = {
    cache.invalidate(key)
  }

  def contains(key: String): Boolean = {
    Option(cache.getIfPresent(key)).nonEmpty
  }
} 
Example 43
Source File: RankingCounter.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.core

import java.util.concurrent.TimeUnit

import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
import com.typesafe.config.Config
import org.apache.s2graph.counter.core.RankingCounter.RankingValueMap
import org.apache.s2graph.counter.models.Counter
import org.apache.s2graph.counter.util.{CollectionCacheConfig, CollectionCache}
import org.slf4j.LoggerFactory
import scala.collection.JavaConversions._

case class RankingRow(key: RankingKey, value: Map[String, RankingValue])
case class RateRankingRow(key: RankingKey, value: Map[String, RateRankingValue])

class RankingCounter(config: Config, storage: RankingStorage) {
  private val log = LoggerFactory.getLogger(getClass)

  val storageStatusCache = new CollectionCache[Option[Boolean]](CollectionCacheConfig(1000, 60, negativeCache = false, 60))

  val cache: LoadingCache[RankingKey, RankingResult] = CacheBuilder.newBuilder()
    .maximumSize(1000000)
    .expireAfterWrite(10l, TimeUnit.MINUTES)
    .build(
      new CacheLoader[RankingKey, RankingResult]() {
        def load(rankingKey: RankingKey): RankingResult = {
//          log.warn(s"cache load: $rankingKey")
          storage.getTopK(rankingKey, Int.MaxValue).getOrElse(RankingResult(-1, Nil))
        }
      }
    )

  def getTopK(rankingKey: RankingKey, k: Int = Int.MaxValue): Option[RankingResult] = {
    val tq = rankingKey.eq.tq
    if (TimedQualifier.getQualifiers(Seq(tq.q), System.currentTimeMillis()).head == tq) {
      // do not use cache
      storage.getTopK(rankingKey, k)
    }
    else {
      val result = cache.get(rankingKey)
      if (result.values.nonEmpty) {
        Some(result.copy(values = result.values.take(k)))
      }
      else {
        None
      }
    }
  }

  def update(key: RankingKey, value: RankingValueMap, k: Int): Unit = {
    storage.update(key, value, k)
  }

  def update(values: Seq[(RankingKey, RankingValueMap)], k: Int): Unit = {
    storage.update(values, k)
  }

  def delete(key: RankingKey): Unit = {
    storage.delete(key)
  }

  def getAllItems(keys: Seq[RankingKey], k: Int = Int.MaxValue): Seq[String] = {
    val oldKeys = keys.filter(key => TimedQualifier.getQualifiers(Seq(key.eq.tq.q), System.currentTimeMillis()).head != key.eq.tq)
    val cached = cache.getAllPresent(oldKeys)
    val missed = keys.diff(cached.keys.toSeq)
    val found = storage.getTopK(missed, k)

//    log.warn(s"cached: ${cached.size()}, missed: ${missed.size}")

    for {
      (key, result) <- found
    } {
      cache.put(key, result)
    }

    for {
      (key, RankingResult(totalScore, values)) <- cached ++ found
      (item, score) <- values
    } yield {
      item
    }
  }.toSeq.distinct

  def prepare(policy: Counter): Unit = {
    storage.prepare(policy)
  }

  def destroy(policy: Counter): Unit = {
    storage.destroy(policy)
  }

  def ready(policy: Counter): Boolean = {
    storageStatusCache.withCache(s"${policy.id}") {
      Some(storage.ready(policy))
    }.getOrElse(false)
  }
}

object RankingCounter {
  type RankingValueMap = Map[String, RankingValue]
} 
Example 44
Source File: GraphOperation.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.core.v2

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import com.typesafe.config.Config
import org.apache.http.HttpStatus
import org.apache.s2graph.counter.config.S2CounterConfig
import org.apache.s2graph.counter.core.v2.ExactStorageGraph._
import org.asynchttpclient.DefaultAsyncHttpClientConfig
import org.slf4j.LoggerFactory
import play.api.libs.json.{JsObject, JsValue, Json}
import scala.concurrent.Await
import scala.concurrent.duration._

class GraphOperation(config: Config) {
  // using play-ws without play app
  implicit val materializer = ActorMaterializer.create(ActorSystem(getClass.getSimpleName))
  private val builder = new DefaultAsyncHttpClientConfig.Builder()
  private val wsClient = new play.api.libs.ws.ning.NingWSClient(builder.build)
  private val s2config = new S2CounterConfig(config)
  val s2graphUrl = s2config.GRAPH_URL
  private[counter] val log = LoggerFactory.getLogger(this.getClass)

  import scala.concurrent.ExecutionContext.Implicits.global

  def createLabel(json: JsValue): Boolean = {
    // fix counter label's schemaVersion
    val newJson = json.as[JsObject] ++ Json.obj("schemaVersion" -> "v2")
    val future = wsClient.url(s"$s2graphUrl/graphs/createLabel").post(newJson).map { resp =>
      resp.status match {
        case HttpStatus.SC_OK =>
          true
        case _ =>
          throw new RuntimeException(s"failed createLabel. errCode: ${resp.status} body: ${resp.body} query: $json")
      }
    }

    Await.result(future, 10 second)
  }

  def deleteLabel(label: String): Boolean = {
    val future = wsClient.url(s"$s2graphUrl/graphs/deleteLabel/$label").put("").map { resp =>
      resp.status match {
        case HttpStatus.SC_OK =>
          true
        case _ =>
          throw new RuntimeException(s"failed deleteLabel. errCode: ${resp.status} body: ${resp.body}")
      }
    }

    Await.result(future, 10 second)
  }
} 
Example 45
Source File: S2GraphMutateRoute.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.http

import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpResponse, StatusCodes}
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.{ExceptionHandler, Route}
import com.fasterxml.jackson.core.JsonParseException
import org.apache.s2graph.core.rest.RequestParser
import org.apache.s2graph.core.storage.MutateResponse
import org.apache.s2graph.core.{GraphElement, S2Graph}
import org.slf4j.LoggerFactory
import play.api.libs.json.{JsValue, Json}

import scala.concurrent.{ExecutionContext, Future}

trait S2GraphMutateRoute extends PlayJsonSupport {

  val s2graph: S2Graph
  val logger = LoggerFactory.getLogger(this.getClass)

  lazy val parser = new RequestParser(s2graph)

  lazy val exceptionHandler = ExceptionHandler {
    case ex: JsonParseException => complete(StatusCodes.BadRequest -> ex.getMessage)
    case ex: java.lang.IllegalArgumentException => complete(StatusCodes.BadRequest -> ex.getMessage)
  }

  lazy val mutateVertex = path("vertex" / Segments) { params =>
    implicit val ec = s2graph.ec

    val (operation, serviceNameOpt, columnNameOpt) = params match {
      case operation :: serviceName :: columnName :: Nil => (operation, Option(serviceName), Option(columnName))
      case operation :: Nil => (operation, None, None)
      case _ => throw new RuntimeException("invalid params")
    }

    entity(as[JsValue]) { payload =>
      val future = vertexMutate(payload, operation, serviceNameOpt, columnNameOpt).map(Json.toJson(_))

      complete(future)
    }
  }

  lazy val mutateEdge = path("edge" / Segment) { operation =>
    implicit val ec = s2graph.ec

    entity(as[JsValue]) { payload =>
      val future = edgeMutate(payload, operation, withWait = true).map(Json.toJson(_))

      complete(future)
    }
  }

  def vertexMutate(jsValue: JsValue,
                   operation: String,
                   serviceNameOpt: Option[String] = None,
                   columnNameOpt: Option[String] = None,
                   withWait: Boolean = true)(implicit ec: ExecutionContext): Future[Seq[Boolean]] = {
    val vertices = parser.toVertices(jsValue, operation, serviceNameOpt, columnNameOpt)

    val verticesToStore = vertices.filterNot(_.isAsync)

    s2graph.mutateVertices(verticesToStore, withWait).map(_.map(_.isSuccess))
  }

  def edgeMutate(elementsWithTsv: Seq[(GraphElement, String)], withWait: Boolean)(implicit ec: ExecutionContext): Future[Seq[Boolean]] = {
    val elementWithIdxs = elementsWithTsv.zipWithIndex
    val (elementSync, elementAsync) = elementWithIdxs.partition { case ((element, tsv), idx) => !element.isAsync }

    val retToSkip = elementAsync.map(_._2 -> MutateResponse.Success)
    val (elementsToStore, _) = elementSync.map(_._1).unzip
    val elementsIdxToStore = elementSync.map(_._2)

    s2graph.mutateElements(elementsToStore, withWait).map { mutateResponses =>
      elementsIdxToStore.zip(mutateResponses) ++ retToSkip
    }.map(_.sortBy(_._1).map(_._2.isSuccess))
  }

  def edgeMutate(jsValue: JsValue, operation: String, withWait: Boolean)(implicit ec: ExecutionContext): Future[Seq[Boolean]] = {
    val edgesWithTsv = parser.parseJsonFormat(jsValue, operation)
    edgeMutate(edgesWithTsv, withWait)
  }

  // expose routes
  lazy val mutateRoute: Route =
    post {
      concat(
        handleExceptions(exceptionHandler) {
          mutateVertex
        },
        handleExceptions(exceptionHandler) {
          mutateEdge
        }
      )
    }

} 
Example 46
Source File: Server.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.http

import java.time.Instant

import scala.language.postfixOps
import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration.Duration
import scala.util.{Failure, Success}
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpResponse, StatusCodes}
import akka.http.scaladsl.server.Route
import akka.http.scaladsl.server.Directives._
import akka.stream.ActorMaterializer
import com.typesafe.config.ConfigFactory
import org.apache.s2graph.core.S2Graph
import org.slf4j.LoggerFactory

object Server extends App
  with S2GraphTraversalRoute
  with S2GraphAdminRoute
  with S2GraphMutateRoute
  with S2GraphQLRoute {

  implicit val system: ActorSystem = ActorSystem("S2GraphHttpServer")
  implicit val materializer: ActorMaterializer = ActorMaterializer()
  implicit val executionContext: ExecutionContext = system.dispatcher

  val config = ConfigFactory.load()

  override val s2graph = new S2Graph(config)
  override val logger = LoggerFactory.getLogger(this.getClass)

  val port = sys.props.get("http.port").fold(8000)(_.toInt)
  val interface = sys.props.get("http.interface").fold("0.0.0.0")(identity)

  val startAt = System.currentTimeMillis()

  def uptime = System.currentTimeMillis() - startAt

  def serverHealth = s"""{ "port": ${port}, "interface": "${interface}", "started_at": ${Instant.ofEpochMilli(startAt)}, "uptime": "${uptime} millis" """

  def health = HttpResponse(status = StatusCodes.OK, entity = HttpEntity(ContentTypes.`application/json`, serverHealth))

  // Allows you to determine routes to expose according to external settings.
  lazy val routes: Route = concat(
    pathPrefix("graphs")(traversalRoute),
    pathPrefix("mutate")(mutateRoute),
    pathPrefix("admin")(adminRoute),
    pathPrefix("graphql")(graphqlRoute),
    get(complete(health))
  )

  val binding: Future[Http.ServerBinding] = Http().bindAndHandle(routes, interface, port)
  binding.onComplete {
    case Success(bound) => logger.info(s"Server online at http://${bound.localAddress.getHostString}:${bound.localAddress.getPort}/")
    case Failure(e) => logger.error(s"Server could not start!", e)
  }

  scala.sys.addShutdownHook { () =>
    s2graph.shutdown()
    system.terminate()
    logger.info("System terminated")
  }

  Await.result(system.whenTerminated, Duration.Inf)
} 
Example 47
Source File: S2GraphTraversalRoute.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.http

import org.apache.s2graph.core.S2Graph
import org.apache.s2graph.core.rest.RestHandler.CanLookup
import org.slf4j.LoggerFactory
import akka.http.scaladsl.server.Route
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.model.headers.RawHeader
import akka.http.scaladsl.model._
import org.apache.s2graph.core.GraphExceptions.{BadQueryException, JsonParseException}
import org.apache.s2graph.core.rest.RestHandler
import play.api.libs.json._

object S2GraphTraversalRoute {

  import scala.collection._

  implicit val akkHttpHeaderLookup = new CanLookup[immutable.Seq[HttpHeader]] {
    override def lookup(m: immutable.Seq[HttpHeader], key: String): Option[String] = m.find(_.name() == key).map(_.value())
  }
}

trait S2GraphTraversalRoute extends PlayJsonSupport {

  import S2GraphTraversalRoute._

  val s2graph: S2Graph
  val logger = LoggerFactory.getLogger(this.getClass)

  implicit lazy val ec = s2graph.ec
  lazy val restHandler = new RestHandler(s2graph)

  // The `/graphs/*` APIs are implemented to be branched from the existing restHandler.doPost.
  // Implement it first by delegating that function.
  lazy val delegated: Route = {
    entity(as[String]) { body =>
      logger.info(body)

      extractRequest { request =>
        val result = restHandler.doPost(request.uri.toRelative.toString(), body, request.headers)
        val responseHeaders = result.headers.toList.map { case (k, v) => RawHeader(k, v) }

        val f = result.body.map(StatusCodes.OK -> _).recover {
          case BadQueryException(msg, _) => StatusCodes.BadRequest -> Json.obj("error" -> msg)
          case JsonParseException(msg) => StatusCodes.BadRequest -> Json.obj("error" -> msg)
          case e: Exception => StatusCodes.InternalServerError -> Json.obj("error" -> e.toString)
        }

        respondWithHeaders(responseHeaders)(complete(f))
      }
    }
  }

  // expose routes
  lazy val traversalRoute: Route =
    post {
      concat(
        delegated // getEdges, experiments, etc.
      )
    }
} 
Example 48
Source File: MutateRouteSpec.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.http

import akka.http.scaladsl.marshalling.Marshal
import akka.http.scaladsl.model._
import akka.http.scaladsl.testkit.ScalatestRouteTest
import com.typesafe.config.ConfigFactory
import org.apache.s2graph.core.Management.JsonModel.Prop
import org.apache.s2graph.core.S2Graph
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpec}
import org.slf4j.LoggerFactory
import play.api.libs.json.{JsValue, Json}

class MutateRouteSpec extends WordSpec with Matchers with PlayJsonSupport with ScalaFutures with ScalatestRouteTest with S2GraphMutateRoute with BeforeAndAfterAll {

  import scala.collection.JavaConverters._

  val dbUrl = "jdbc:h2:file:./var/metastore_mutate_route;MODE=MYSQL;AUTO_SERVER=true"
  val config =
    ConfigFactory.parseMap(Map("db.default.url" -> dbUrl).asJava)
  lazy val s2graph = new S2Graph(config.withFallback(ConfigFactory.load()))
  override val logger = LoggerFactory.getLogger(this.getClass)

  override def afterAll(): Unit = {
    s2graph.shutdown(true)
  }

  lazy val routes = mutateRoute

  val serviceName = "kakaoFavorites"
  val columnName = "userName"

  "MutateRoute" should {

    "be able to insert vertex (POST /mutate/vertex/insert)" in {
      s2graph.management.createService(serviceName, "localhost", s"${serviceName}-dev", 1, None)
      s2graph.management.createServiceColumn(serviceName, columnName, "string", Seq(Prop("age", "0", "integer")))

      // {"timestamp": 10, "serviceName": "s2graph", "columnName": "user", "id": 1, "props": {}}
      val param = Json.obj(
        "timestamp" -> 10,
        "serviceName" -> serviceName,
        "columnName" -> columnName,
        "id" -> "user_a",
        "props" -> Json.obj(
          "age" -> 20
        )
      )

      val entity = Marshal(param).to[MessageEntity].futureValue
      val request = Post("/vertex/insert").withEntity(entity)

      request ~> routes ~> check {
        status should ===(StatusCodes.OK)
        contentType should ===(ContentTypes.`application/json`)

        val response = entityAs[JsValue]
        response should ===(Json.toJson(Seq(true)))
      }
    }
  }
} 
Example 49
Source File: AdminRouteSpec.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.http

import akka.http.scaladsl.marshalling.Marshal
import akka.http.scaladsl.model._
import akka.http.scaladsl.testkit.ScalatestRouteTest
import com.typesafe.config.ConfigFactory
import org.apache.s2graph.core.Management.JsonModel.Prop
import org.apache.s2graph.core.S2Graph
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpec}
import org.slf4j.LoggerFactory
import play.api.libs.json.{JsString, JsValue, Json}

class AdminRoutesSpec extends WordSpec with Matchers with ScalaFutures with ScalatestRouteTest with S2GraphAdminRoute with BeforeAndAfterAll {
  import scala.collection.JavaConverters._

  val dbUrl = "jdbc:h2:file:./var/metastore_admin_route;MODE=MYSQL;AUTO_SERVER=true"
  val config =
    ConfigFactory.parseMap(Map("db.default.url" -> dbUrl).asJava)
  lazy val s2graph = new S2Graph(config.withFallback(ConfigFactory.load()))
  override val logger = LoggerFactory.getLogger(this.getClass)

  override def afterAll(): Unit = {
    s2graph.shutdown(true)
  }

  lazy val routes = adminRoute

  val serviceName = "kakaoFavorites"
  val columnName = "userName"

  "AdminRoute" should {
    "be able to create service (POST /createService)" in {
      val serviceParam = Json.obj(
        "serviceName" -> serviceName,
        "compressionAlgorithm" -> "gz"
      )

      val serviceEntity = Marshal(serviceParam).to[MessageEntity].futureValue
      val request = Post("/createService").withEntity(serviceEntity)

      request ~> routes ~> check {
        status should ===(StatusCodes.Created)
        contentType should ===(ContentTypes.`application/json`)

        val response = entityAs[JsValue]

        (response \\ "name").head should ===(JsString("kakaoFavorites"))
        (response \\ "status").head should ===(JsString("ok"))
      }
    }

    "return service if present (GET /getService/{serviceName})" in {
      val request = HttpRequest(uri = s"/getService/$serviceName")

      request ~> routes ~> check {
        status should ===(StatusCodes.OK)
        contentType should ===(ContentTypes.`application/json`)

        val response = entityAs[JsValue]

        (response \\ "name").head should ===(JsString("kakaoFavorites"))
      }
    }

    "be able to create serviceColumn (POST /createServiceColumn)" in {
      val serviceColumnParam = Json.obj(
        "serviceName" -> serviceName,
        "columnName" -> columnName,
        "columnType" -> "string",
        "props" -> Json.toJson(
          Seq(
            Json.obj("name" -> "age", "defaultValue" -> "-1", "dataType" -> "integer")
          )
        )
      )

      val serviceColumnEntity = Marshal(serviceColumnParam).to[MessageEntity].futureValue
      val request = Post("/createServiceColumn").withEntity(serviceColumnEntity)

      request ~> routes ~> check {
        status should ===(StatusCodes.Created)
        contentType should ===(ContentTypes.`application/json`)

        val response = entityAs[JsValue]

        (response \\ "serviceName").head should ===(JsString("kakaoFavorites"))
        (response \\ "columnName").head should ===(JsString("userName"))
        (response \\ "status").head should ===(JsString("ok"))
      }
    }
  }
} 
Example 50
Source File: DexExtensionGrpcConnector.scala    From matcher   with MIT License 5 votes vote down vote up
package com.wavesplatform.dex.tool.connectors

import cats.instances.future._
import cats.instances.list._
import cats.syntax.either._
import cats.syntax.traverse._
import ch.qos.logback.classic.{Level, Logger}
import com.wavesplatform.dex.cli.ErrorOr
import com.wavesplatform.dex.domain.account.Address
import com.wavesplatform.dex.domain.asset.Asset
import com.wavesplatform.dex.domain.asset.Asset.{IssuedAsset, Waves}
import com.wavesplatform.dex.grpc.integration.WavesBlockchainClientBuilder
import com.wavesplatform.dex.grpc.integration.clients.WavesBlockchainClient
import com.wavesplatform.dex.grpc.integration.dto.BriefAssetDescription
import com.wavesplatform.dex.grpc.integration.settings.GrpcClientSettings.ChannelOptionsSettings
import com.wavesplatform.dex.grpc.integration.settings.{GrpcClientSettings, WavesBlockchainClientSettings}
import monix.execution.Scheduler.Implicits.{global => monixScheduler}
import org.slf4j.LoggerFactory

import scala.concurrent.ExecutionContext.Implicits.{global => executionContext}
import scala.concurrent.duration._
import scala.concurrent.{Await, Awaitable, Future}
import scala.util.Try

case class DexExtensionGrpcConnector private (target: String, grpcAsyncClient: WavesBlockchainClient[Future]) extends Connector {

  import DexExtensionGrpcConnector._

  private def sync[A](f: Awaitable[A]): A = Await.result(f, requestTimeout)

  private def getDetailedBalance(asset: Asset, balance: Long): Future[(Asset, (BriefAssetDescription, Long))] = asset match {
    case Waves           => Future.successful(asset -> (BriefAssetDescription.wavesDescription -> balance))
    case ia: IssuedAsset => grpcAsyncClient.assetDescription(ia).map(maybeDesc => ia -> (maybeDesc.get -> balance))
  }

  def matcherBalanceAsync(address: Address): Future[DetailedBalance] =
    for {
      balances                <- grpcAsyncClient.allAssetsSpendableBalance(address)
      balancesWithDescription <- balances.toList.traverse { case (a, b) => getDetailedBalance(a, b) }
    } yield balancesWithDescription.toMap

  def matcherBalanceSync(address: Address): DetailedBalance = sync { matcherBalanceAsync(address) }

  override def close(): Unit = Await.result(grpcAsyncClient.close(), 3.seconds)
}

object DexExtensionGrpcConnector {

  val requestTimeout: FiniteDuration = 10.seconds

  type DetailedBalance = Map[Asset, (BriefAssetDescription, Long)]

  def create(target: String): ErrorOr[DexExtensionGrpcConnector] =
    Try {
      LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME).asInstanceOf[Logger].setLevel(Level.OFF)
      val grpcSettings   = GrpcClientSettings(target, 5, 5, true, 2.seconds, 5.seconds, 1.minute, ChannelOptionsSettings(5.seconds))
      val clientSettings = WavesBlockchainClientSettings(grpcSettings, 100.milliseconds, 100)
      WavesBlockchainClientBuilder.async(clientSettings, monixScheduler, executionContext)
    }.toEither
      .bimap(ex => s"Cannot establish gRPC connection to DEX Extension! $ex", client => DexExtensionGrpcConnector(target, client))
} 
Example 51
Source File: ScorexLogging.scala    From matcher   with MIT License 5 votes vote down vote up
package com.wavesplatform.dex.domain.utils

import monix.eval.Task
import monix.execution.{CancelableFuture, Scheduler}
import org.slf4j.{Logger, LoggerFactory}

case class LoggerFacade(logger: Logger) {

  def trace(message: => String): Unit                       = if (logger.isTraceEnabled) logger.trace(message)
  def debug(message: => String, arg: Any): Unit             = if (logger.isDebugEnabled) logger.debug(message, arg)
  def debug(message: => String): Unit                       = if (logger.isDebugEnabled) logger.debug(message)
  def info(message: => String): Unit                        = if (logger.isInfoEnabled) logger.info(message)
  def info(message: => String, arg: Any): Unit              = if (logger.isInfoEnabled) logger.info(message, arg)
  def info(message: => String, throwable: Throwable): Unit  = if (logger.isInfoEnabled) logger.info(message, throwable)
  def warn(message: => String): Unit                        = if (logger.isWarnEnabled) logger.warn(message)
  def warn(message: => String, throwable: Throwable): Unit  = if (logger.isWarnEnabled) logger.warn(message, throwable)
  def error(message: => String): Unit                       = if (logger.isErrorEnabled) logger.error(message)
  def error(message: => String, throwable: Throwable): Unit = if (logger.isErrorEnabled) logger.error(message, throwable)
}

trait ScorexLogging {

  protected lazy val log: LoggerFacade = LoggerFacade(LoggerFactory.getLogger(this.getClass))

  implicit class TaskExt[A](t: Task[A]) {

    def runAsyncLogErr(implicit s: Scheduler): CancelableFuture[A] = logErr.runToFuture(s)

    def logErr: Task[A] = t.onErrorHandleWith { ex =>
      log.error(s"Error executing task", ex)
      Task.raiseError[A](ex)
    }
  }
} 
Example 52
Source File: MyJournalSpec.scala    From akka-tools   with MIT License 5 votes vote down vote up
package no.nextgentel.oss.akkatools.persistence.jdbcjournal

import akka.persistence.CapabilityFlag
import akka.persistence.journal.JournalSpec
import akka.persistence.snapshot.SnapshotStoreSpec
import com.typesafe.config.ConfigFactory
import org.scalatest.BeforeAndAfter
import org.slf4j.LoggerFactory

class MyJournalSpec extends JournalSpec (
  config = ConfigFactory.parseString(
    s"""
       |akka.persistence.query.jdbc-read-journal.configName = MyJournalSpec
       |jdbc-journal.configName = MyJournalSpec
       |jdbc-snapshot-store.configName = MyJournalSpec
     """.stripMargin).withFallback(ConfigFactory.load("application-test.conf"))) {

  val log = LoggerFactory.getLogger(getClass)

  val errorHandler = new JdbcJournalErrorHandler {
    override def onError(e: Exception): Unit = log.error("JdbcJournalErrorHandler.onError", e)
  }

  JdbcJournalConfig.setConfig("MyJournalSpec", JdbcJournalConfig(DataSourceUtil.createDataSource("MyJournalSpec"), Some(errorHandler), StorageRepoConfig(), new PersistenceIdParserImpl('-')))

  override protected def supportsRejectingNonSerializableObjects: CapabilityFlag = false
}

class MySnapshotStoreSpec extends SnapshotStoreSpec (
  config = ConfigFactory.parseString(
    s"""
       |akka.persistence.query.jdbc-read-journal.configName = MySnapshotStoreSpec
       |jdbc-journal.configName = MySnapshotStoreSpec
       |jdbc-snapshot-store.configName = MySnapshotStoreSpec
     """.stripMargin).withFallback(ConfigFactory.load("application-test.conf"))) with BeforeAndAfter {

  val log = LoggerFactory.getLogger(getClass)

  val errorHandler = new JdbcJournalErrorHandler {
    override def onError(e: Exception): Unit = log.error("JdbcJournalErrorHandler.onError", e)
  }

  JdbcJournalConfig.setConfig("MySnapshotStoreSpec", JdbcJournalConfig(DataSourceUtil.createDataSource("MySnapshotStoreSpec"), None, StorageRepoConfig(), new PersistenceIdParserImpl('-')))

} 
Example 53
Source File: SeedNodesListOrderingResolver.scala    From akka-tools   with MIT License 5 votes vote down vote up
package no.nextgentel.oss.akkatools.cluster

import java.util.concurrent.TimeUnit

import org.slf4j.LoggerFactory

import scala.concurrent.duration.FiniteDuration

// Must be used together with ClusterListener
object SeedNodesListOrderingResolver {
  val log = LoggerFactory.getLogger(getClass)
  def resolveSeedNodesList(repo:ClusterNodeRepo, clusterConfig:AkkaClusterConfig, maxAliveAge:FiniteDuration = FiniteDuration(20, TimeUnit.SECONDS)):AkkaClusterConfig = {

    val ourNode = clusterConfig.thisHostnameAndPort()

    // Since we're starting up, just make sure that we do not find info about ourself from our last run
    log.debug(s"removeClusterNodeAlive for $ourNode")
    repo.removeClusterNodeAlive(ourNode)

    val allSeedNodes = clusterConfig.seedNodes

    val weAreSeedNode = allSeedNodes.contains(ourNode)
    if ( !weAreSeedNode) {
      log.info("We are NOT a seedNode")
    }

    val aliveNodes = repo.findAliveClusterNodes(maxAliveAge, onlyJoined = false).map {
      node =>
        // alive nodes are listed on this form:
        //    akka.tcp://SomeAkkaSystem@host1:9999
        // We must remove everything before hostname:port
        val index = node.indexOf('@')
        if ( index >= 0) node.substring(index+1) else node
    }

    val seedNodeListToUse = if ( aliveNodes.isEmpty ) {
      if (weAreSeedNode) {
        val allNodesExceptOur = allSeedNodes.filter(n => n != ourNode)
        val list = List(ourNode) ++ allNodesExceptOur

        log.info("No other clusterNodes found as alive - We must be first seed node - seedNodeListToUse: " + list)
        list
      } else {
        log.info("No other clusterNodes found as alive - Since we're not a seedNode, we're using the list as is - seedNodeListToUse: " + allSeedNodes)
        allSeedNodes
      }
    } else {

      if (weAreSeedNode) {
        val allNodesExceptOurAndAliveOnes = allSeedNodes.filter(n => n != ourNode && !aliveNodes.contains(n))

        val list = aliveNodes ++ List(ourNode) ++ allNodesExceptOurAndAliveOnes

        log.info("Found other alive clusterNodes - we should not be first seed node. Alive cluster nodes: " + aliveNodes.mkString(",") + " - seedNodeListToUse: " + list)
        list
      } else {
        val allNodesExceptAliveOnes = allSeedNodes.filter(n => !aliveNodes.contains(n))

        val list = aliveNodes ++ allNodesExceptAliveOnes

        log.info("Found other alive clusterNodes - Alive cluster nodes: " + aliveNodes.mkString(",") + " - seedNodeListToUse: " + list)
        list

      }
    }

    clusterConfig.withSeedNodeList(seedNodeListToUse)
  }
} 
Example 54
Source File: ClusterSingletonHelperTest.scala    From akka-tools   with MIT License 5 votes vote down vote up
package no.nextgentel.oss.akkatools.cluster

import akka.actor.{Actor, ActorRef, ActorSystem, Props}
import akka.testkit.{TestKit, TestProbe}
import com.typesafe.config.ConfigFactory
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuiteLike, Matchers}
import org.slf4j.LoggerFactory

import scala.util.Random

object ClusterSingletonHelperTest {
  val port = 20000 + Random.nextInt(20000)
}

class ClusterSingletonHelperTest (_system:ActorSystem) extends TestKit(_system) with FunSuiteLike with Matchers with BeforeAndAfterAll with BeforeAndAfter {

  def this() = this(ActorSystem("test-actor-system", ConfigFactory.parseString(
      s"""akka.actor.provider = "akka.cluster.ClusterActorRefProvider"
          |akka.remote.enabled-transports = ["akka.remote.netty.tcp"]
          |akka.remote.netty.tcp.hostname="localhost"
          |akka.remote.netty.tcp.port=${ClusterSingletonHelperTest.port}
          |akka.cluster.seed-nodes = ["akka.tcp://test-actor-system@localhost:${ClusterSingletonHelperTest.port}"]
    """.stripMargin
    ).withFallback(ConfigFactory.load("application-test.conf"))))

  override def afterAll {
    TestKit.shutdownActorSystem(system)
  }

  val log = LoggerFactory.getLogger(getClass)


  test("start and communicate with cluster-singleton") {


    val started = TestProbe()
    val proxy = ClusterSingletonHelper.startClusterSingleton(system, Props(new OurClusterSingleton(started.ref)), "ocl")
    started.expectMsg("started")
    val sender = TestProbe()
    sender.send(proxy, "ping")
    sender.expectMsg("pong")

  }
}

class OurClusterSingleton(started:ActorRef) extends Actor {

  started ! "started"
  def receive = {
    case "ping" => sender ! "pong"
  }
} 
Example 55
Source File: AggregateCmdMessageExtractor.scala    From akka-tools   with MIT License 5 votes vote down vote up
package no.nextgentel.oss.akkatools.aggregate

import akka.cluster.sharding.ShardRegion.HashCodeMessageExtractor
import akka.persistence.{SaveSnapshotFailure, SaveSnapshotSuccess}
import no.nextgentel.oss.akkatools.persistence.{DurableMessage, DurableMessageReceived}
import org.slf4j.LoggerFactory

class AggregateCmdMessageExtractor(val maxNumberOfNodes:Int = 2, val shardsPrNode:Int = 10) extends HashCodeMessageExtractor(maxNumberOfNodes * shardsPrNode) {
  val log = LoggerFactory.getLogger(getClass)

  private def extractId(x:AnyRef):String = {
    x match {
      case a:AggregateCmd =>
        if (a.id == null) {
          log.warn("id() returned null in message: " + x)
        }
        a.id
      case q:AnyRef =>
        log.error("Do not know how to extract entryId for message of type " + x.getClass + ": " + x)
        null
    }
  }

  override def entityId(rawMessage: Any): String = {
    rawMessage match {
      case dm:DurableMessage => extractId(dm.payload)
      case dmr:DurableMessageReceived =>
        dmr.confirmationRoutingInfo.map(_.toString).getOrElse {
          log.warn("DurableMessageReceived.getConfirmationRoutingInfo() returned null in message: " + rawMessage)
          null
        }
      case x:SaveSnapshotSuccess =>
        // Ignoring this message to mitigate Akka-bug https://github.com/akka/akka/issues/19893
        log.debug(s"Ignoring $x  to mitigate akka issue 19893")
        null
      case x:SaveSnapshotFailure =>
        // Ignoring this message to mitigate Akka-bug https://github.com/akka/akka/issues/19893
        log.debug(s"Ignoring $x to mitigate akka issue 19893")
        null
      case x:AnyRef => extractId(x)
    }
  }


} 
Example 56
Source File: GeneralAggregateWithShardingTest.scala    From akka-tools   with MIT License 5 votes vote down vote up
package no.nextgentel.oss.akkatools.aggregate

import java.util.{Arrays, UUID}

import akka.actor.ActorSystem
import akka.actor.Status.Failure
import akka.testkit.{TestKit, TestProbe}
import com.typesafe.config.ConfigFactory
import no.nextgentel.oss.akkatools.aggregate.testAggregate.StateName._
import no.nextgentel.oss.akkatools.aggregate.testAggregate.{StateName, _}
import no.nextgentel.oss.akkatools.testing.AggregateTesting
import org.scalatest._
import org.slf4j.LoggerFactory

import scala.util.Random

object GeneralAggregateWithShardingTest {
  val port = 20000 + Random.nextInt(20000)
}


class GeneralAggregateWithShardingTest(_system:ActorSystem) extends TestKit(_system) with FunSuiteLike with Matchers with BeforeAndAfterAll with BeforeAndAfter {

  def this() = this(ActorSystem("test-actor-system", ConfigFactory.parseString(
    s"""akka.actor.provider = "akka.cluster.ClusterActorRefProvider"
        |akka.remote.enabled-transports = ["akka.remote.netty.tcp"]
        |akka.remote.netty.tcp.hostname="localhost"
        |akka.remote.netty.tcp.port=${GeneralAggregateWithShardingTest.port}
        |akka.cluster.seed-nodes = ["akka.tcp://test-actor-system@localhost:${GeneralAggregateWithShardingTest.port}"]
    """.stripMargin
  ).withFallback(ConfigFactory.load("application-test.conf"))))

  override def afterAll {
    TestKit.shutdownActorSystem(system)
  }

  val log = LoggerFactory.getLogger(getClass)
  private def generateId() = UUID.randomUUID().toString

  val seatIds = List("s1","id-used-in-Failed-in-onAfterValidationSuccess", "s2", "s3-This-id-is-going-to-be-discarded", "s4")

  trait TestEnv extends AggregateTesting[BookingState] {
    val id = generateId()
    val printShop = TestProbe()
    val cinema = TestProbe()
    val onSuccessDmForwardReceiver = TestProbe()

    val starter = new AggregateStarterSimple("booking", system).withAggregatePropsCreator {
      dmSelf =>
        BookingAggregate.props(dmSelf, dmForwardAndConfirm(printShop.ref).path, dmForwardAndConfirm(cinema.ref).path, seatIds, dmForwardAndConfirm(onSuccessDmForwardReceiver.ref).path)
    }

    val main = starter.dispatcher
    starter.start()

    def assertState(correctState:BookingState): Unit = {
      assert(getState(id) == correctState)
    }

  }




  test("normal flow") {

    new TestEnv {

      // Make sure we start with empty state
      assertState(BookingState.empty())

      val maxSeats = 2
      val sender = TestProbe()
      // Open the booking
      println("1")
      sendDMBlocking(main, OpenBookingCmd(id, maxSeats), sender.ref)
      println("2")
      assertState(BookingState(OPEN, maxSeats, Set()))

    }
  }
} 
Example 57
package no.nextgentel.oss.akkatools.aggregate.aggregateTest_usingAggregateStateBase

import java.util.UUID

import akka.actor.{ActorPath, ActorSystem, Props}
import akka.persistence.{DeleteMessagesFailure, DeleteMessagesSuccess, SaveSnapshotFailure, SaveSnapshotSuccess, SnapshotMetadata, SnapshotOffer}
import akka.testkit.{TestKit, TestProbe}
import com.typesafe.config.ConfigFactory
import no.nextgentel.oss.akkatools.aggregate._
import no.nextgentel.oss.akkatools.testing.AggregateTesting
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuiteLike, Matchers}
import org.slf4j.LoggerFactory



  override def onSnapshotOffer(offer: SnapshotOffer): Unit = {
    state = offer.snapshot.asInstanceOf[StringState]
  }

  override def acceptSnapshotRequest(req: SaveSnapshotOfCurrentState): Boolean = {
    if (state == StringState("WAT")) {
      state = StringState("SAVED")
      true
    }
    else {
      state = StringState("WAT") //So it works second time
      false
    }
  }

  override def onSnapshotSuccess(success: SaveSnapshotSuccess): Unit = {
    state = StringState("SUCCESS_SNAP")
  }

  override def onSnapshotFailure(failure: SaveSnapshotFailure): Unit = {
    state = StringState("FAIL_SNAP")
  }

  override def onDeleteMessagesSuccess(success: DeleteMessagesSuccess): Unit = {
    state = StringState("SUCCESS_MSG")
  }

  override def onDeleteMessagesFailure(failure: DeleteMessagesFailure): Unit = {
    state = StringState("FAIL_MSG")
  }

  // Used as prefix/base when constructing the persistenceId to use - the unique ID is extracted runtime from actorPath which is construced by Sharding-coordinator
  override def persistenceIdBase(): String = "/x/"
}

case class StringEv(data: String)

case class StringState(data:String) extends AggregateStateBase[StringEv, StringState] {
  override def transitionState(event: StringEv): StateTransition[StringEv, StringState] =
    StateTransition(StringState(event.data))
} 
Example 58
Source File: HomeControllerSpec.scala    From phantom-activator-template   with Apache License 2.0 5 votes vote down vote up
package controllers

import org.scalatest.{BeforeAndAfterAll, MustMatchers, WordSpec}
import org.scalatestplus.play.guice.GuiceOneAppPerTest
import org.slf4j.LoggerFactory
import play.api.test.Helpers._
import play.api.test._


class HomeControllerSpec extends WordSpec with GuiceOneAppPerTest with MustMatchers with BeforeAndAfterAll {

  private val logger = LoggerFactory.getLogger("embedded-cassandra")

  override protected def beforeAll(): Unit = {
    EmbeddedCassandra.start(logger)
  }

  override protected def afterAll(): Unit = {
    EmbeddedCassandra.cleanup(logger)
  }

  "Application" should {

    "render the index page" in {
      val result = route(app, FakeRequest(GET, "/")).get
      status(result) must equal(OK)
      contentAsString(result) must include("Spring Bud")
    }
  }

} 
Example 59
Source File: OntologyHubClientTest.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package clients

import java.nio.file.Paths
import org.junit.After
import org.junit.Assert
import org.junit.Assume
import org.junit.Before
import org.junit.BeforeClass
import org.junit.Test
import org.slf4j.LoggerFactory
import play.Logger
import utilities.Adapters.AwaitFuture
import clients.HTTPClient


object OntologyHubClientTest {

  val logger = LoggerFactory.getLogger(this.getClass)

  @BeforeClass
  def check_before() {
    Assume.assumeTrue(ontonethub_is_running)
    logger.info("Ontonethub is UP! [TESTING...]")
  }

  private def ontonethub_is_running = {
    val client = HTTPClient
    client.start()
    val ontonethub = new OntonetHubClient(client.ws)
    val check = ontonethub.status().await
    client.stop()
    check
  }

} 
Example 60
Source File: CatalogStandardizationService.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package it.almawave.kb.http.endpoints

import javax.inject.Singleton
import javax.ws.rs.Path
import org.slf4j.LoggerFactory
import it.almawave.kb.http.models.OntologyMetaModel
import com.typesafe.config.ConfigFactory
import java.nio.file.Paths
import it.almawave.linkeddata.kb.catalog.CatalogBox
import it.almawave.linkeddata.kb.utils.JSONHelper
import it.almawave.daf.standardization.refactoring.CatalogStandardizer

@Singleton
@Path("conf://api-catalog-config")
class CatalogStandardizationService {

  private val logger = LoggerFactory.getLogger(this.getClass)

  val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile())
  val catalog = new CatalogBox(conf)
  catalog.start()

  val _standardizer = CatalogStandardizer(catalog)
  _standardizer.start

  def stardardizer = _standardizer

  //  TODO: STOP?

} 
Example 61
Source File: Status.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package it.almawave.kb.http.endpoints

import java.time.LocalTime
import io.swagger.annotations.Api
import javax.ws.rs.Path
import javax.ws.rs.GET
import javax.ws.rs.Produces
import io.swagger.annotations.ApiOperation
import javax.ws.rs.core.MediaType
import org.slf4j.LoggerFactory
import javax.ws.rs.core.Context
import javax.ws.rs.core.UriInfo
import javax.ws.rs.core.Request
import it.almawave.linkeddata.kb.utils.JSONHelper
import java.time.LocalDateTime
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
import java.util.Locale
import java.time.ZoneId

@Api(tags = Array("catalog"))
@Path("/status")
class Status {

  private val logger = LoggerFactory.getLogger(this.getClass)

  @Context
  var uriInfo: UriInfo = null

  @GET
  @Produces(Array(MediaType.APPLICATION_JSON))
  @ApiOperation(nickname = "status", value = "endpoint status")
  def status() = {

    val base_uri = uriInfo.getBaseUri
    val msg = s"the service is running at ${base_uri}"
    logger.info(msg)

    val _now = now()
    StatusMsg(_now._1, _now._2, msg)

  }

  def now() = {

    val zdt = ZonedDateTime.now(ZoneId.of("+1"))
    val dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ")

    (zdt.format(dtf), zdt)

  }

}

case class StatusMsg(
  now:      String,
  dateTime: ZonedDateTime,
  msg:      String
) 
Example 62
Source File: StandardizationQueryV1.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package it.almawave.daf.standardization.v1

import com.typesafe.config.Config
import java.nio.file.Paths
import java.nio.file.Files
import it.almawave.linkeddata.kb.catalog.VocabularyBox
import java.io.FileFilter
import java.io.File
import java.nio.file.Path
import org.slf4j.LoggerFactory


  def details(voc_box: VocabularyBox, level: Int, uri: String, lang: String) = {

    val onto_id = detect_ontology(voc_box)

    val query_path: Path = detailsQueryFile(onto_id)
      .map(_.toPath())
      .getOrElse(default_query_details)

    // disabled for too many logs! logger.debug(s"daf.standardization> try ${voc_box.id} with details query: ${query_path}")

    val query = new String(Files.readAllBytes(query_path))
    query
      .replace("${vocabularyID}", voc_box.id)
      .replace("${level}", level.toString())
      .replace("${uri}", uri)
      .replace("${lang}", lang)

  }

} 
Example 63
Source File: MainSingleStandardization.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package it.almawave.daf.standardization.refactoring

import org.slf4j.LoggerFactory
import java.nio.file.Paths
import it.almawave.linkeddata.kb.catalog.CatalogBox
import com.typesafe.config.ConfigFactory
import it.almawave.linkeddata.kb.utils.JSONHelper

import it.almawave.linkeddata.kb.catalog.VocabularyBox

object MainSingleStandardization extends App {

  private val logger = LoggerFactory.getLogger(this.getClass)

  val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile())

  val catalog = new CatalogBox(conf)
  catalog.start()

  //  val vocID = "legal-status"
  //  val vocID = "theme-subtheme-mapping"
  val vocID = "licences"
  val std: VocabularyStandardizer = CatalogStandardizer(catalog).getVocabularyStandardizerByID(vocID).get
  std.start

  //  println("\n\nCSV")
  //  std.toCSV()(System.out)
  //
  //  println("\n\nTREE")
  val tree = std.toJSONTree()
  val json_tree = JSONHelper.writeToString(tree)
  println(json_tree)

  println("\n\nMETA")
  val meta = std.getMetadata()
  val json_meta = JSONHelper.writeToString(meta)
  println(json_meta)

  std.stop
  catalog.stop()

  // TODO: verify the closing of all active connections

}

object MainStandardizationAll extends App {

  private val logger = LoggerFactory.getLogger(this.getClass)
  val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile())

  val catalog = new CatalogBox(conf)
  catalog.start()

  val std = CatalogStandardizer(catalog)
  std.start

  val list = std.getVocabularyStandardizersList()

  list.foreach { vstd =>
    //    println(s"\n\nCSV for ${vstd.vbox}")
    vstd.toCSV()(System.out)
  }

  std.stop
  catalog.stop()

  System.exit(0)
} 
Example 64
Source File: NO_MainAllStandardization.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package it.almawave.daf.standardization.refactoring

import org.slf4j.LoggerFactory
import java.nio.file.Paths
import com.typesafe.config.ConfigFactory
import it.almawave.linkeddata.kb.catalog.CatalogBox
import scala.util.Try

object NO_MainAllStandardization extends App {

  private val logger = LoggerFactory.getLogger(this.getClass)

  val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile())

  val catalog = new CatalogBox(conf)
  catalog.start()

  CatalogStandardizer(catalog).getVocabularyStandardizersList()
    .zipWithIndex
    .slice(1, 2)
    .toList
    .foreach {
      case (std, i) =>

        Try {
          println(s"""\n\n$i: ${std.vbox}""")
          println("\n\nCSV_______________________________________")
          std.toCSV()(System.out)
          println("\n\n__________________________________________")
        }

    }

  catalog.stop()

} 
Example 65
Source File: KafkaSink.scala    From spark-kafka-sink   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.metrics.sink

import java.util.{ Properties, Locale }
import java.util.concurrent.TimeUnit

import org.slf4j.Logger
import org.slf4j.LoggerFactory

import com.codahale.metrics.MetricRegistry
import org.apache.spark.SecurityManager

import com.manyangled.kafkasink.KafkaReporter

class KafkaSink(val properties: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends org.apache.spark.metrics.sink.Sink {

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private def popt(prop: String): Option[String] =
    Option(properties.getProperty(prop))

  // These are non-negotiable
  val broker = popt("broker").get
  val topic = popt("topic").get

  lazy val reporter = new KafkaReporter(registry, broker, topic, properties)

  def start(): Unit = {
    logger.info(s"Starting Kafka metric reporter at $broker, topic $topic")
    val period = popt("period").getOrElse("10").toLong
    val tstr = popt("unit").getOrElse("seconds").toUpperCase(Locale.ROOT)
    val tunit = TimeUnit.valueOf(tstr)
    reporter.start(period, tunit)
  }

  def stop(): Unit = {
    logger.info(s"Stopping Kafka metric reporter at $broker, topic $topic")
    reporter.stop()
  }

  def report(): Unit = {
    logger.info(s"Reporting metrics to Kafka reporter at $broker, topic $topic")
    reporter.report()
  }
} 
Example 66
Source File: Demo2iConfig.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.spark.examples.demos.fbl
import com.basho.riak.client.core.RiakNode
import com.basho.riak.client.core.query.Namespace
import com.basho.riak.spark.rdd._
import com.basho.riak.client.core.query.indexes.LongIntIndex
import com.basho.riak.spark.rdd.connector.RiakConnectorConf
import com.basho.riak.spark.rdd.{RiakFunctions, BucketDef}
import com.basho.riak.spark.util.RiakObjectConversionUtil
import com.basho.riak.spark.writer.{WriteDataMapperFactory, WriteDataMapper}
import org.slf4j.{LoggerFactory, Logger}
import com.basho.riak.spark._
import com.basho.riak.client.core.query.{RiakObject, Namespace}
import com.basho.riak.client.api.annotations.{RiakKey, RiakIndex}
import org.apache.spark.{SparkConf, SparkContext}

case class Demo2iConfig(riakConf: RiakConnectorConf, index: String, bucket: String, from: Long, to: Long, name:String){

  def riakNodeBuilder(minConnections:Int = 2):RiakNode.Builder = {
    val firstTheWinner = riakConf.hosts.iterator.next()

    new RiakNode.Builder()
      .withMinConnections(minConnections)
      .withRemoteAddress(firstTheWinner.getHost)
      .withRemotePort(firstTheWinner.getPort)
  }
}

object Demo2iConfig{

  val DEFAULT_INDEX_NAME = "creationNo"
  val DEFAULT_BUCKET_NAME = "test-bucket"
  val DEFAULT_FROM = 1
  val DEFAULT_TO = 4

  def apply(sparkConf: SparkConf):Demo2iConfig = {

    Demo2iConfig(
      riakConf = RiakConnectorConf(sparkConf),
      index = sparkConf.get("spark.riak.demo.index", DEFAULT_INDEX_NAME),
      bucket = sparkConf.get("spark.riak.demo.bucket", DEFAULT_BUCKET_NAME),
      from = sparkConf.get("spark.riak.demo.from", DEFAULT_FROM.toString).toLong,
      to = sparkConf.get("spark.riak.demo.to", DEFAULT_TO.toString).toLong,
      name = sparkConf.get("spark.app.name", "")
    )
  }
} 
Example 67
Source File: ClientHandler.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.stub

import java.nio.ByteBuffer
import java.nio.channels.{AsynchronousSocketChannel, Channel, CompletionHandler}

import com.basho.riak.client.core.netty.RiakMessageCodec
import com.basho.riak.stub.ClientHandler._
import org.slf4j.LoggerFactory

class ClientHandler(val messageHandler: RiakMessageHandler) extends RiakMessageCodec
  with CompletionHandler[Integer, (AsynchronousSocketChannel, ByteBuffer)] {

  override def completed(result: Integer, attachment: (AsynchronousSocketChannel, ByteBuffer)): Unit = attachment match {
    case (channel, buffer) if result > 0 =>
      logger.info(s"Message received ${SocketUtils.serverConnectionAsStr(channel)} ($result bytes).")
      RiakMessageEncoder.decode(buffer.rewind().asInstanceOf[ByteBuffer]) match {
        case Some(m) if channel.isOpen =>
          val msgs = messageHandler.handle(new Context(channel), m)
          val encoded = RiakMessageEncoder.encode(msgs.toSeq: _*)
          val bytes = channel.write(encoded).get
          assert(bytes == encoded.position())
          logger.info(s"Response sent ${SocketUtils.clientConnectionAsStr(channel)} ($bytes bytes).")
          messageHandler.onRespond(m, msgs)
        case Some(m) if !channel.isOpen =>
          logger.warn("Impossible to write message to channel: channel has been already closed")
        case None => // TODO: handle case with no message
      }
      buffer.clear()
      channel.read(buffer, (channel, buffer), this)
    case _ =>
  }

  override def failed(exc: Throwable, attachment: (AsynchronousSocketChannel, ByteBuffer)): Unit = attachment match {
    case (channel, _) if channel.isOpen =>
      logger.error(s"Something went wrong with client ${SocketUtils.serverConnectionAsStr(channel)}", exc)
      disconnectClient(channel)
    case _ => // channel is already closed - do nothing
  }

  def disconnectClient(client: AsynchronousSocketChannel): Unit = this.synchronized {
    client.isOpen match {
      case true =>
        val connectionString = SocketUtils.serverConnectionAsStr(client)
        client.shutdownInput()
        client.shutdownOutput()
        client.close()
        logger.info(s"Client $connectionString was gracefully disconnected")
      case false => // client is already closed - do nothing
    }
  }
}

object ClientHandler {
  val logger = LoggerFactory.getLogger(classOf[ClientHandler])

  
  class Context(val channel: Channel)

} 
Example 68
Source File: RiakNodeStub.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.stub

import java.net.InetSocketAddress
import java.nio.ByteBuffer
import java.nio.channels.{AsynchronousCloseException, AsynchronousServerSocketChannel, AsynchronousSocketChannel, CompletionHandler}

import com.basho.riak.client.core.util.HostAndPort
import com.basho.riak.stub.RiakNodeStub._
import org.slf4j.LoggerFactory

class RiakNodeStub(val host: String, val port: Int, messageHandler: RiakMessageHandler) {

  private final val localAddress = new InetSocketAddress(host, port)
  private final val clientHandler = new ClientHandler(messageHandler)

  private var serverChannel: AsynchronousServerSocketChannel = _
  private var clients: List[AsynchronousSocketChannel] = Nil

  def start(): HostAndPort = {
    serverChannel = AsynchronousServerSocketChannel.open()
    require(serverChannel.isOpen)

    serverChannel.bind(localAddress)
    serverChannel.accept(serverChannel, new CompletionHandler[AsynchronousSocketChannel, AsynchronousServerSocketChannel]() {
      override def completed(client: AsynchronousSocketChannel, server: AsynchronousServerSocketChannel): Unit = {
        logger.info(s"Incoming connection: ${SocketUtils.serverConnectionAsStr(client)}")
        this.synchronized {
          clients = client :: clients
        }

        val buffer = ByteBuffer.allocateDirect(1024) // scalastyle:ignore
        client.read(buffer, (client, buffer), clientHandler)

        server.accept(server, this)
      }

      override def failed(exc: Throwable, serverChannel: AsynchronousServerSocketChannel): Unit = exc match {
        case _: AsynchronousCloseException =>
        case _ => logger.error(s"Something went wrong:  ${serverChannel.toString}", exc);
      }
    })

    HostAndPort.fromParts(
      serverChannel.getLocalAddress.asInstanceOf[InetSocketAddress].getHostString,
      serverChannel.getLocalAddress.asInstanceOf[InetSocketAddress].getPort)
  }

  def stop(): Unit = this.synchronized {
    Option(serverChannel).foreach(_.close)
    clients.foreach(clientHandler.disconnectClient)
  }
}

object RiakNodeStub {
  val logger = LoggerFactory.getLogger(classOf[RiakNodeStub])
  final val DEFAULT_HOST = "localhost"

  def apply(host: String, port: Int, messageHandler: RiakMessageHandler): RiakNodeStub = new RiakNodeStub(host, port, messageHandler)

  def apply(port: Int, messageHandler: RiakMessageHandler): RiakNodeStub = RiakNodeStub(DEFAULT_HOST, port, messageHandler)

  def apply(messageHandler: RiakMessageHandler): RiakNodeStub = RiakNodeStub(DEFAULT_HOST, 0, messageHandler)
} 
Example 69
Source File: AbstractRiakTest.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.spark.rdd

import com.basho.riak.JsonTestFunctions
import com.basho.riak.client.core.RiakNode
import com.basho.riak.client.core.query.Namespace
import org.junit._
import org.junit.rules.TestWatcher
import org.junit.runner.Description
import org.slf4j.{Logger, LoggerFactory}

abstract class AbstractRiakTest extends RiakFunctions with JsonTestFunctions {

  private final val logger: Logger = LoggerFactory.getLogger(this.getClass)

  protected val DEFAULT_NAMESPACE = new Namespace("default","test-bucket")
  protected val DEFAULT_NAMESPACE_4STORE = new Namespace("default", "test-bucket-4store")

  protected override val numberOfParallelRequests: Int = 4
  protected override val nodeBuilder: RiakNode.Builder = new RiakNode.Builder().withMinConnections(numberOfParallelRequests)

  protected val jsonData: Option[String] = None

  @Rule
  def watchman: TestWatcher = new TestWatcher() {
    override def starting(description: Description): Unit = {
      super.starting(description)
      logger.info(
        "\n----------------------------------------\n" +
          "  [TEST STARTED]  {}\n" +
          "----------------------------------------\n",
        description.getDisplayName)
    }

    override def finished(description: Description): Unit = {
      super.finished(description)
      logger.info(
        "\n----------------------------------------\n" +
          "  [TEST FINISHED]  {}\n" +
          "----------------------------------------\n",
        description.getDisplayName)
    }
  }

  @Before
  protected def initialize(): Unit = setupData()

  protected def setupData(): Unit = {
    // Purge data: data might be not only created, but it may be also changed during the previous test case execution
    //
    // For manual check: curl -v http://localhost:10018/buckets/test-bucket/keys?keys=true
    List(DEFAULT_NAMESPACE, DEFAULT_NAMESPACE_4STORE) foreach resetAndEmptyBucket

    withRiakDo(session => jsonData.foreach(createValues(session, DEFAULT_NAMESPACE, _)))
  }
} 
Example 70
Source File: UnorderedParallelParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import java.util.UUID

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.apache.parquet.schema.MessageType
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object UnorderedParallelParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             parallelism: Int,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val valueCodecConfiguration = options.toValueCodecConfiguration

    validateWritePath(path, options)

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .zipWithIndex
      .groupBy(parallelism, elemAndIndex => Math.floorMod(elemAndIndex._2, parallelism))
      .map(elemAndIndex => encode(elemAndIndex._1))
      .fold(UnorderedChunk(path, schema, options))(_.write(_))
      .map(_.close())
      .async
      .mergeSubstreamsWithParallelism(parallelism)
      .toMat(Sink.ignore)(Keep.right)
  }

  private trait UnorderedChunk {

    def write(record: RowParquetRecord): UnorderedChunk

    def close(): Unit

  }

  private object UnorderedChunk {

    def apply(basePath: Path,
              schema: MessageType,
              options: ParquetWriter.Options): UnorderedChunk = new PendingUnorderedChunk(basePath, schema, options)

    private[UnorderedChunk] class PendingUnorderedChunk(basePath: Path,
                                        schema: MessageType,
                                        options: ParquetWriter.Options) extends UnorderedChunk {
      override def write(record: RowParquetRecord): UnorderedChunk = {
        val chunkPath = Path.mergePaths(basePath, new Path(s"/part-${UUID.randomUUID()}.parquet"))
        val writer = ParquetWriter.internalWriter(chunkPath, schema, options)
        writer.write(record)
        new StartedUnorderedChunk(chunkPath, writer, acc = 1)
      }

      override def close(): Unit = ()
    }

    private[UnorderedChunk] class StartedUnorderedChunk(chunkPath: Path,
                                        writer: ParquetWriter.InternalWriter,
                                        acc: Long
                                       ) extends UnorderedChunk {
      override def write(record: RowParquetRecord): UnorderedChunk = {
        writer.write(record)
        new StartedUnorderedChunk(chunkPath, writer, acc = acc + 1)
      }

      override def close(): Unit = {
        if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath")
        writer.close()
      }
    }
  }

} 
Example 71
Source File: IndefiniteStreamParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s
import akka.stream.FlowShape
import akka.stream.scaladsl.{Broadcast, Flow, GraphDSL, Keep, Sink, ZipWith}
import com.github.mjakubowski84.parquet4s.ParquetWriter.ParquetWriterFactory
import org.apache.hadoop.fs.Path
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration.FiniteDuration


private[parquet4s] object IndefiniteStreamParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[In, ToWrite: ParquetWriterFactory, Mat](path: Path,
                                                    maxChunkSize: Int,
                                                    chunkWriteTimeWindow: FiniteDuration,
                                                    buildChunkPath: ChunkPathBuilder[In] = ChunkPathBuilder.default,
                                                    preWriteTransformation: In => ToWrite = identity[In] _,
                                                    postWriteSink: Sink[Seq[In], Mat] = Sink.ignore,
                                                    options: ParquetWriter.Options = ParquetWriter.Options()
                                            ): Sink[In, Mat] = {
    validateWritePath(path, options)

    val internalFlow = Flow.fromGraph(GraphDSL.create() { implicit b =>
      import GraphDSL.Implicits._
    
      val inChunkFlow = b.add(Flow[In].groupedWithin(maxChunkSize, chunkWriteTimeWindow))
      val broadcastChunks = b.add(Broadcast[Seq[In]](outputPorts = 2))
      val writeFlow = Flow[Seq[In]].map { chunk =>
        val toWrite = chunk.map(preWriteTransformation)
        val chunkPath = buildChunkPath(path, chunk)
        if (logger.isDebugEnabled()) logger.debug(s"Writing ${toWrite.size} records to $chunkPath")
        ParquetWriter.writeAndClose(chunkPath.toString, toWrite, options)
      }
      val zip = b.add(ZipWith[Seq[In], Unit, Seq[In]]((chunk, _) => chunk))
      
      inChunkFlow ~> broadcastChunks ~> writeFlow ~> zip.in1
                     broadcastChunks ~> zip.in0

      FlowShape(inChunkFlow.in, zip.out)               
    })

    internalFlow.toMat(postWriteSink)(Keep.right)
  }

} 
Example 72
Source File: SingleFileParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object SingleFileParquetSink {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val writer = ParquetWriter.internalWriter(path, schema, options)
    val valueCodecConfiguration = options.toValueCodecConfiguration
    val isDebugEnabled = logger.isDebugEnabled

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .map(encode)
      .fold(0) { case (acc, record) => writer.write(record); acc + 1}
      .map { count =>
        if (isDebugEnabled) logger.debug(s"$count records were successfully written to $path")
        writer.close()
      }
      .toMat(Sink.ignore)(Keep.right)
  }

} 
Example 73
Source File: SequentialFileSplittingParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.apache.parquet.schema.MessageType
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object SequentialFileSplittingParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             maxRecordsPerFile: Long,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val valueCodecConfiguration = options.toValueCodecConfiguration

    validateWritePath(path, options)

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .zipWithIndex
      .map { case (elem, index) => OrderedChunkElem(encode(elem), index) }
      .fold(OrderedChunk(path, schema, maxRecordsPerFile, options))(_.write(_))
      .map(_.close())
      .toMat(Sink.ignore)(Keep.right)
  }

  private case class OrderedChunkElem(record: RowParquetRecord, index: Long) {
    def isSplit(maxRecordsPerFile: Long): Boolean = index % maxRecordsPerFile == 0
  }

  private trait OrderedChunk {
    def write(elem: OrderedChunkElem): OrderedChunk
    def close(): Unit
  }

  private object OrderedChunk {

    def apply(basePath: Path,
              schema: MessageType,
              maxRecordsPerFile: Long,
              options: ParquetWriter.Options): OrderedChunk = new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options)


    private[OrderedChunk] class PendingOrderedChunk(basePath: Path,
                                                    schema: MessageType,
                                                    maxRecordsPerFile: Long,
                                                    options: ParquetWriter.Options) extends OrderedChunk {
      override def write(elem: OrderedChunkElem): OrderedChunk = {
        val chunkNumber: Int = Math.floorDiv(elem.index, maxRecordsPerFile).toInt
        val chunkPath = Path.mergePaths(basePath, new Path(chunkFileName(chunkNumber)))
        val writer = ParquetWriter.internalWriter(chunkPath, schema, options)
        writer.write(elem.record)
        new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = 1)
      }

      override def close(): Unit = ()

      private def chunkFileName(chunkNumber: Int): String = f"/part-$chunkNumber%05d.parquet"
    }

    private[OrderedChunk] class StartedOrderedChunk(basePath: Path,
                                                    schema: MessageType,
                                                    maxRecordsPerFile: Long,
                                                    options: ParquetWriter.Options,
                                                    chunkPath: Path,
                                                    writer: ParquetWriter.InternalWriter,
                                                    acc: Long) extends OrderedChunk {
      override def write(elem: OrderedChunkElem): OrderedChunk = {
        if (elem.isSplit(maxRecordsPerFile)) {
          this.close()
          new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options).write(elem)
        } else {
          writer.write(elem.record)
          new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = acc + 1)
        }
      }

      override def close(): Unit = {
        if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath")
        writer.close()
      }
    }
  }

} 
Example 74
Source File: SddfApp.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf

import org.joda.time.format.PeriodFormatterBuilder
import org.slf4j.Logger
import org.slf4j.LoggerFactory

import de.unihamburg.vsis.sddf.config.Config

import scopt.Read
import scopt.OptionParser

class SddfApp extends App {

  val periodFormatter = (new PeriodFormatterBuilder() minimumPrintedDigits (2) printZeroAlways ()
    appendDays () appendSeparator ("d ")
    appendHours () appendSeparator (":") appendMinutes () appendSuffix (":") appendSeconds ()
    appendSeparator (".")
    minimumPrintedDigits (3) appendMillis () toFormatter)

  @transient var _log: Logger = null
  // Method to get or create the logger for this object
  def log(): Logger = {
    if (_log == null) {
      _log = LoggerFactory.getLogger(getClass.getName)
    }
    _log
  }
  
  @transient var _logLineage: Logger = null
  // Method to get or create the logger for this object
  def logLineage(): Logger = {
    if (_logLineage == null) {
      _logLineage = LoggerFactory.getLogger("lineage")
    }
    _logLineage
  }
  

  // extend Parser to accept the type Option
  implicit val optionRead: Read[Option[String]] = Read.reads(Some(_))
  
  // parsing commandline parameters
  val parser = new OptionParser[Parameters]("sddf") {
    head("SddF", "0.1.0")
    opt[Map[String, String]]('p', "properties") optional() valueName("<property>") action { (x, c) =>
      c.copy(properties = x) } text("set arbitrary properties via command line")
    opt[Option[String]]('c', "config-file") optional() action { (x, c) =>
      c.copy(propertyPath = x) } text("optional path to a property file")
  }
  
  // parser.parse returns Option[C]
  val parameters = parser.parse(args, Parameters())
  var propertiesCommandline: Map[String, String] = Map()
  var propertiesPath: Option[String] = None
   parameters match {
    case Some(config) =>
      propertiesCommandline = config.properties
      propertiesPath = config.propertyPath
    case None =>
      // arguments are bad, error message will have been displayed
  }
  
  val Conf: Config = if(propertiesPath.isDefined) new Config(propertiesPath.get) else new Config()
  
  propertiesCommandline.foreach(props => {
	  Conf.setPropertyCommandline(props._1, props._2)
  })
  
}

case class Parameters(propertyPath: Option[String] = None, properties: Map[String,String] = Map()) 
Example 75
Source File: Logging.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf.logging

import org.slf4j.LoggerFactory

import com.typesafe.scalalogging.slf4j.Logger

trait Logging {

  @transient protected var _log: Logger = null

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (_log == null) {
      _log = Logger(LoggerFactory.getLogger(getClass))
    }
    _log
  }
} 
Example 76
Source File: MetaCatalogProcessor.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package it.gov.daf.ingestion.metacatalog

import com.typesafe.config.ConfigFactory
import play.api.libs.json._
import it.gov.daf.catalogmanager._
import it.gov.daf.catalogmanager.json._
import org.slf4j.{Logger, LoggerFactory}
import org.apache.commons.lang.StringEscapeUtils

//Get Logical_uri, process MetadataCatalog and get the required info
class MetaCatalogProcessor(metaCatalog: MetaCatalog) {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val sftpDefPrefix = ConfigFactory.load().getString("ingmgr.sftpdef.prefixdir")

  
  def separator() = {
    metaCatalog.operational
      .input_src.sftp
      .flatMap(_.headOption)
      .flatMap(_.param)
      .flatMap(_.split(", ").reverse.headOption)
      .map(_.replace("sep=", ""))
      .getOrElse(",")
  }

  def fileFormatNifi(): String = {
    val inputSftp = metaCatalog.operational.input_src.sftp

    inputSftp match {
      case Some(s) =>
        val sftps: Seq[SourceSftp] = s.filter(x => x.name.equals("sftp_daf"))
        if (sftps.nonEmpty) sftps.head.param.getOrElse("")
        else ""

      case None => ""
    }
  }

  def ingPipelineNifi(): String = {
    ingPipeline.mkString(",")
  }

} 
Example 77
Source File: KuduController.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package controllers

import org.apache.kudu.spark.kudu._
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.slf4j.{ Logger, LoggerFactory }

import scala.util.{ Failure, Try }

class KuduController(sparkSession: SparkSession, master: String) {

  val alogger: Logger = LoggerFactory.getLogger(this.getClass)

  def readData(table: String): Try[DataFrame] =  Try{
    sparkSession
      .sqlContext
      .read
      .options(Map("kudu.master" -> master, "kudu.table" -> table)).kudu
  }.recoverWith {
    case ex =>
      alogger.error(s"Exception ${ex.getMessage}\n ${ex.getStackTrace.mkString("\n")} ")
      Failure(ex)
  }
} 
Example 78
Source File: PhysicalDatasetController.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package controllers

import cats.syntax.show.toShow
import com.typesafe.config.Config
import daf.dataset.{ DatasetParams, FileDatasetParams, KuduDatasetParams }
import daf.filesystem.fileFormatShow
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.apache.spark.SparkConf
import org.slf4j.{ Logger, LoggerFactory }

class PhysicalDatasetController(sparkSession: SparkSession,
                                kuduMaster: String,
                                defaultLimit: Option[Int] = None,
                                defaultChunkSize: Int = 0) {

  lazy val kuduController = new KuduController(sparkSession, kuduMaster)
  lazy val hdfsController = new HDFSController(sparkSession)

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private def addLimit(dataframe: DataFrame, limit: Option[Int]) = (limit, defaultLimit) match {
    case (None, None)                 => dataframe
    case (None, Some(value))          => dataframe.limit { value }
    case (Some(value), None)          => dataframe.limit { value }
    case (Some(value), Some(default)) => dataframe.limit { math.min(value, default) }
  }

  def kudu(params: KuduDatasetParams, limit: Option[Int] = None) = {
    logger.debug { s"Reading data from kudu table [${params.table}]" }
    kuduController.readData(params.table).map { addLimit(_, limit) }
  }

  def hdfs(params: FileDatasetParams, limit: Option[Int] = None) = {
    logger.debug { s"Reading data from hdfs at path [${params.path}]" }
    hdfsController.readData(params.path, params.format.show, params.param("separator")).map { addLimit(_, limit) }
  }

  def get(params: DatasetParams, limit: Option[Int]= None) = params match {
    case kuduParams: KuduDatasetParams => kudu(kuduParams, limit)
    case hdfsParams: FileDatasetParams => hdfs(hdfsParams, limit)
  }

}

object PhysicalDatasetController {

  private def getOptionalString(path: String, underlying: Config) = {
    if (underlying.hasPath(path)) {
      Some(underlying.getString(path))
    } else {
      None
    }
  }

  private def getOptionalInt(path: String, underlying: Config) = {
    if (underlying.hasPath(path)) {
      Some(underlying.getInt(path))
    } else {
      None
    }
  }

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply(configuration: Config): PhysicalDatasetController = {

    val sparkConfig = new SparkConf()
    sparkConfig.set("spark.driver.memory", configuration.getString("spark.driver.memory"))

    val sparkSession = SparkSession.builder().master("local").config(sparkConfig).getOrCreate()

    val kuduMaster = configuration.getString("kudu.master")

    val defaultLimit = if (configuration hasPath "daf.row_limit") Some {
      configuration.getInt("daf.row_limit")
    } else None

    System.setProperty("sun.security.krb5.debug", "true")

    new PhysicalDatasetController(sparkSession, kuduMaster, defaultLimit)
  }
} 
Example 79
Source File: HDFSController.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package controllers

import com.databricks.spark.avro._
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.slf4j.{Logger, LoggerFactory}

import scala.util.{Failure, Try}

class HDFSController(sparkSession: SparkSession) {

  val alogger: Logger = LoggerFactory.getLogger(this.getClass)

  def readData(path: String, format: String, separator: Option[String]): Try[DataFrame] =  format match {
    case "csv" => Try {
      val pathFixAle = path + "/" + path.split("/").last + ".csv"
      alogger.debug(s"questo e' il path $pathFixAle")
      separator match {
        case None => sparkSession.read.csv(pathFixAle)
        case Some(sep) => sparkSession.read.format("csv")
          .option("sep", sep)
          .option("inferSchema", "true")
          .option("header", "true")
          .load(pathFixAle)
      }
    }
    case "parquet" => Try { sparkSession.read.parquet(path) }
    case "avro"    => Try { sparkSession.read.avro(path) }
    case unknown   => Failure { new IllegalArgumentException(s"Unsupported format [$unknown]") }
  }
} 
Example 80
Source File: CatalogManagerClient.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package daf.catalogmanager

import java.net.URLEncoder
import java.security.AccessControlException

import it.gov.daf.common.config.Read
import json._
import org.slf4j.LoggerFactory
import play.api.Configuration
import play.api.libs.json.Json
import scalaj.http.{ Http, HttpResponse }

import scala.util.{ Failure, Try, Success => TrySuccess }

class CatalogManagerClient(serviceUrl: String) {

  val logger = LoggerFactory.getLogger("it.gov.daf.CatalogManager")

  private def callService(authorization: String, catalogId: String) = Try {
    Http(s"$serviceUrl/catalog-manager/v1/catalog-ds/get/${URLEncoder.encode(catalogId,"UTF-8")}")
      .header("Authorization", authorization)
      .asString
  }

  private def parseCatalog(response: HttpResponse[String]) =
    if (response.code == 401)  Failure { new AccessControlException("Unauthorized") }
    else if (response.isError) Failure { new RuntimeException(s"Error retrieving catalog data: [${response.code}] with body [${response.body}]") }
    else Try { Json.parse(response.body).as[MetaCatalog] }

  def getById(authorization: String, catalogId: String): Try[MetaCatalog] = for {
    response <- callService(authorization, catalogId)
    catalog  <- parseCatalog(response)
  } yield catalog

}

object CatalogManagerClient {

  def fromConfig(config: Configuration) = Read.string { "daf.catalog_url" }.!.read(config) match {
    case TrySuccess(baseUrl) => new CatalogManagerClient(baseUrl)
    case Failure(error)      => throw new RuntimeException("Unable to create catalog-manager client", error)
  }

} 
Example 81
Source File: HDFSBase.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package daf.util

import better.files.{ File, _ }
import daf.util.DataFrameClasses.{ Address, Person }
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hdfs.{ HdfsConfiguration, MiniDFSCluster }
import org.apache.hadoop.test.PathUtils
import org.apache.spark.sql.{ SaveMode, SparkSession }
import org.scalatest.{ BeforeAndAfterAll, FlatSpec, Matchers }
import org.slf4j.LoggerFactory

import scala.util.{ Failure, Random, Try }

abstract class HDFSBase extends FlatSpec with Matchers with BeforeAndAfterAll {

  var miniCluster: Try[MiniDFSCluster] = Failure[MiniDFSCluster](new Exception)

  var fileSystem: Try[FileSystem] = Failure[FileSystem](new Exception)

  val sparkSession: SparkSession = SparkSession.builder().master("local").getOrCreate()

  val alogger = LoggerFactory.getLogger(this.getClass)

  val (testDataPath, confPath) = {
    val testDataPath = s"${PathUtils.getTestDir(this.getClass).getCanonicalPath}/MiniCluster"
    val confPath = s"$testDataPath/conf"
    (
      testDataPath.toFile.createIfNotExists(asDirectory = true, createParents = false),
      confPath.toFile.createIfNotExists(asDirectory = true, createParents = false)
    )
  }

  def pathAvro = "opendata/test.avro"
  def pathParquet = "opendata/test.parquet"
  def pathCsv = "opendata/test.csv"

  def getSparkSession = sparkSession

  override def beforeAll(): Unit = {

    val conf = new HdfsConfiguration()
    conf.setBoolean("dfs.permissions", true)
    System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA)

    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath.pathAsString)
    //FileUtil.fullyDelete(testDataPath.toJava)

    conf.set(s"hadoop.proxyuser.${System.getProperties.get("user.name")}.groups", "*")
    conf.set(s"hadoop.proxyuser.${System.getProperties.get("user.name")}.hosts", "*")

    val builder = new MiniDFSCluster.Builder(conf)
    miniCluster = Try(builder.build())
    fileSystem = miniCluster.map(_.getFileSystem)
    fileSystem.foreach(fs => {
      val confFile: File = confPath / "hdfs-site.xml"
      for { os <- confFile.newOutputStream.autoClosed } fs.getConf.writeXml(os)
    })

    writeDf()
  }

  override def afterAll(): Unit = {
    miniCluster.foreach(_.shutdown(true))
    val _ = testDataPath.parent.parent.delete(true)
    sparkSession.stop()
  }

  
  private def writeDf(): Unit = {
    import sparkSession.implicits._

    alogger.info(s"TestDataPath ${testDataPath.toJava.getAbsolutePath}")
    alogger.info(s"ConfPath ${confPath.toJava.getAbsolutePath}")
    val persons = (1 to 10).map(i => Person(s"Andy$i", Random.nextInt(85), Address("Via Ciccio Cappuccio")))
    val caseClassDS = persons.toDS()
    caseClassDS.write.format("parquet").mode(SaveMode.Overwrite).save(pathParquet)
    caseClassDS.write.format("com.databricks.spark.avro").mode(SaveMode.Overwrite).save(pathAvro)
    //writing directly the Person dataframe generates an exception
    caseClassDS.toDF.select("name", "age").write.format("csv").mode(SaveMode.Overwrite).option("header", "true").save(pathCsv)
  }
}

object DataFrameClasses {

  final case class Address(street: String)

  final case class Person(name: String, age: Int, address: Address)
} 
Example 82
Source File: HiveAddJarsEngineHook.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.hive.hook

import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext, EngineHook}
import com.webank.wedatasphere.linkis.engine.hive.executor.HiveEngineExecutor
import com.webank.wedatasphere.linkis.server.JMap
import org.apache.commons.lang.StringUtils
import org.slf4j.LoggerFactory



class HiveAddJarsEngineHook extends EngineHook {

  private var jars:String = _
  private val JARS = "jars"
  private val logger = LoggerFactory.getLogger(classOf[HiveAddJarsEngineHook])
  private val addSql = "add jar "
  override def beforeCreateEngine(params: JMap[String, String]): JMap[String, String] = {
    import scala.collection.JavaConversions._
//    params foreach {
//      case (k, v) => logger.info(s"params key is $k, value is $v")
//    }
    params foreach {
      case (key,value) => if (JARS.equals(key)) jars = value
    }
    logger.info(s"jarArray is {}", jars)
    params
  }

  override def afterCreatedEngine(executor: EngineExecutor): Unit = {
    if (StringUtils.isEmpty(jars)) {
      logger.warn("hive added jars is empty")
      return
    }
    jars.split(",") foreach{
       jar =>
         try{
           logger.info("begin to run hive sql {}", addSql + jar)
           executor.asInstanceOf[HiveEngineExecutor].executeLine(new EngineExecutorContext(executor), addSql + jar)
         }catch{
           case t:Throwable => logger.error(s"run hive sql ${addSql + jar} failed", t)
         }
    }
  }
} 
Example 83
Source File: HiveEngineExecutorFactory.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.hive.executor

import java.io.PrintStream

import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorFactory}
import com.webank.wedatasphere.linkis.engine.hive.common.HiveUtils
import com.webank.wedatasphere.linkis.engine.hive.exception.HiveSessionStartFailedException
import com.webank.wedatasphere.linkis.server.JMap
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.ql.Driver
import org.apache.hadoop.hive.ql.session.SessionState
import org.apache.hadoop.security.UserGroupInformation
import org.slf4j.LoggerFactory
import org.springframework.stereotype.Component


@Component
class HiveEngineExecutorFactory extends EngineExecutorFactory {
  private val logger = LoggerFactory.getLogger(getClass)
  private val HIVE_QUEUE_NAME:String = "mapreduce.job.queuename"
  private val BDP_QUEUE_NAME:String = "wds.linkis.yarnqueue"
  override def createExecutor(options: JMap[String, String]): EngineExecutor = {
    val hiveConf:HiveConf = HiveUtils.getHiveConf
    hiveConf.setVar(HiveConf.ConfVars.HIVEJAR, HiveUtils.jarOfClass(classOf[Driver])
      .getOrElse(throw HiveSessionStartFailedException(40012 ,"cannot find hive-exec.jar, start session failed!")))
    import scala.collection.JavaConversions._
    options.foreach{ case(k,v) => logger.info(s"key is $k, value is $v")}
    options.filter{case (k,v) => k.startsWith("hive.") || k.startsWith("mapreduce.") || k.startsWith("wds.linkis.")}.foreach{case(k, v) =>
      logger.info(s"key is $k, value is $v")
      if (BDP_QUEUE_NAME.equals(k)) hiveConf.set(HIVE_QUEUE_NAME, v) else hiveConf.set(k, v)}
    val sessionState:SessionState = new SessionState(hiveConf)
    sessionState.out = new PrintStream(System.out, true, "utf-8")
    sessionState.info = new PrintStream(System.out, true, "utf-8")
    sessionState.err = new PrintStream(System.out, true, "utf-8")
    SessionState.start(sessionState)
    val ugi = UserGroupInformation.getCurrentUser
    new HiveEngineExecutor(5000, sessionState, ugi, hiveConf)
  }

} 
Example 84
Source File: HiveEngineSpringConfiguration.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.hive

import com.webank.wedatasphere.linkis.engine.execute.hook._
import com.webank.wedatasphere.linkis.engine.execute.{CodeParser, EngineHook, SQLCodeParser}
import com.webank.wedatasphere.linkis.engine.hive.hook.HiveAddJarsEngineHook
import org.slf4j.LoggerFactory
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class HiveEngineSpringConfiguration {

  private val LOG = LoggerFactory.getLogger(getClass)

  @Bean(Array("codeParser"))
  def generateCodeParser:CodeParser = {
    LOG.info("code Parser is set in hive")
    new SQLCodeParser()
  }

  @Bean(Array("engineHooks"))
  def generateEngineHooks:Array[EngineHook] = {
    LOG.info("engineHooks are set in hive.")
    Array(new ReleaseEngineHook, new MaxExecuteNumEngineHook, new HiveAddJarsEngineHook, new JarUdfEngineHook)
  }
} 
Example 85
Source File: HiveQLProcessBuilder.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.enginemanager.hive.process

import java.nio.file.Paths

import com.webank.wedatasphere.linkis.common.conf.Configuration
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration.{DEFAULT_JAVA_OPTS, JAVA_HOME, engineGCLogPath}
import com.webank.wedatasphere.linkis.enginemanager.hive.conf.HiveEngineConfiguration
import com.webank.wedatasphere.linkis.enginemanager.impl.UserEngineResource
import com.webank.wedatasphere.linkis.enginemanager.process.JavaProcessEngineBuilder
import com.webank.wedatasphere.linkis.enginemanager.{AbstractEngineCreator, EngineResource}
import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine
import org.apache.commons.lang.StringUtils
import org.slf4j.LoggerFactory

import scala.collection.mutable.ArrayBuffer


  override protected def classpathCheck(jarOrFiles: Array[String]): Unit = {
    for(jarOrFile <- jarOrFiles){
      checkJarOrFile(jarOrFile)
    }
  }
  //todo Check the jar of the classpath(对classpath的jar进行检查)
  private def checkJarOrFile(jarOrFile:String):Unit = {

  }


  override def build(engineRequest: EngineResource, request: RequestEngine): Unit = {
    this.request = request
    userEngineResource = engineRequest.asInstanceOf[UserEngineResource]
    val javaHome = JAVA_HOME.getValue(request.properties)
    if(StringUtils.isEmpty(javaHome)) {
      warn("We cannot find the java home, use java to run storage repl web server.")
      commandLine += "java"
    } else {
      commandLine += Paths.get(javaHome, "bin/java").toAbsolutePath.toFile.getAbsolutePath
    }
    if (request.properties.containsKey(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)){
      val settingClientMemory = request.properties.get(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)
      if (!settingClientMemory.toLowerCase().endsWith("g")){
        request.properties.put(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key, settingClientMemory + "g")
      }
      //request.properties.put(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key, request.properties.get(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)+"g")
    }
    val clientMemory = HiveEngineConfiguration.HIVE_CLIENT_MEMORY.getValue(request.properties).toString
    if (clientMemory.toLowerCase().endsWith("g")){
      commandLine += ("-Xmx" + clientMemory.toLowerCase())
      commandLine += ("-Xms" + clientMemory.toLowerCase())
    }else{
      commandLine += ("-Xmx" + clientMemory + "g")
      commandLine += ("-Xms" + clientMemory + "g")
    }
    val javaOPTS = getExtractJavaOpts
    val alias = getAlias(request)
    if(StringUtils.isNotEmpty(DEFAULT_JAVA_OPTS.getValue))
      DEFAULT_JAVA_OPTS.getValue.format(engineGCLogPath(port, userEngineResource.getUser, alias)).split("\\s+").foreach(commandLine += _)
    if(StringUtils.isNotEmpty(javaOPTS)) javaOPTS.split("\\s+").foreach(commandLine += _)
    //engineLogJavaOpts(port, alias).trim.split(" ").foreach(commandLine += _)
    if(Configuration.IS_TEST_MODE.getValue) {
      val port = AbstractEngineCreator.getNewPort
      info(s"$toString open debug mode with port $port.")
      commandLine += s"-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$port"
    }
    var classpath = getClasspath(request.properties, getExtractClasspath)
    classpath = classpath ++ request.properties.get("jars").split(",")
    classpathCheck(classpath)
    commandLine += "-Djava.library.path=/appcom/Install/hadoop/lib/native"
    commandLine += "-cp"
    commandLine += classpath.mkString(":")
    commandLine += "com.webank.wedatasphere.linkis.engine.DataWorkCloudEngineApplication"
  }


//  override def build(engineRequest: EngineResource, request: RequestEngine): Unit = {
//    import scala.collection.JavaConversions._
//    request.properties foreach {case (k, v) => LOG.info(s"request key is $k, value is $v")}
//    this.request = request
//    super.build(engineRequest, request)
//
//  }

  override protected val addApacheConfigPath: Boolean = true
} 
Example 86
Source File: HiveEngineManagerSpringConfiguration.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.enginemanager.hive.conf

import com.webank.wedatasphere.linkis.enginemanager.EngineHook
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration
import com.webank.wedatasphere.linkis.enginemanager.hook.{ConsoleConfigurationEngineHook, JarLoaderEngineHook}
import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy}
import com.webank.wedatasphere.linkis.rpc.Sender
import org.slf4j.{Logger, LoggerFactory}
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class HiveEngineManagerSpringConfiguration {

  private val logger:Logger = LoggerFactory.getLogger(getClass)

  @Bean(Array("resources"))
  def createResource(): ModuleInfo = {
    val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong ,
        EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue)


    val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong,
        EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue)
    logger.info("create resource for hive")
    ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance)
  }

  @Bean(name = Array("hooks"))
  def createEngineHook(): Array[EngineHook] = {
    Array(new ConsoleConfigurationEngineHook, new JarLoaderEngineHook)// TODO
  }


} 
Example 87
Source File: PipeLineManagerSpringConfiguration.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.enginemanager.pipeline

import com.webank.wedatasphere.linkis.enginemanager.EngineCreator
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration
import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy}
import com.webank.wedatasphere.linkis.rpc.Sender
import org.slf4j.{Logger, LoggerFactory}
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class PipeLineManagerSpringConfiguration {
  private val logger:Logger = LoggerFactory.getLogger(getClass)

  @Bean(Array("engineCreator"))
  def createEngineCreator(): EngineCreator =new PipeLineDefaultEngineCreator

  @Bean(Array("resources"))
  def createResource(): ModuleInfo = {
    val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong ,
      EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue)


    val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong,
      EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_INSTANCES.getValue)
    logger.info("create resource for pipeline")
    ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance)
  }
} 
Example 88
Source File: PythonEngineSpringConfiguration.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine

import com.webank.wedatasphere.linkis.engine.execute.hook.{MaxExecuteNumEngineHook, ReleaseEngineHook}
import com.webank.wedatasphere.linkis.engine.execute.{CodeParser, EngineHook, PythonCodeParser}
import org.slf4j.LoggerFactory
import org.springframework.context.annotation.Bean
import org.springframework.stereotype.Component


@Component
class PythonEngineSpringConfiguration {
  private val LOG = LoggerFactory.getLogger(getClass)

  @Bean(Array("codeParser"))
  def generateCodeParser:CodeParser = {
    LOG.info("code Parser is set in python")
    new PythonCodeParser()
  }

  @Bean(Array("engineHooks"))
  def generateEngineHooks:Array[EngineHook] = {
    LOG.info("engineHooks are set in python.")
    Array(new ReleaseEngineHook, new MaxExecuteNumEngineHook)
  }
} 
Example 89
Source File: CommonEntranceParser.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.parser

import java.util
import java.util.Date

import com.webank.wedatasphere.linkis.entrance.conf.EntranceConfiguration
import com.webank.wedatasphere.linkis.entrance.exception.EntranceIllegalParamException
import com.webank.wedatasphere.linkis.protocol.constants.TaskConstant
import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask
import com.webank.wedatasphere.linkis.protocol.task.Task
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.scheduler.queue.SchedulerEventState
import org.apache.commons.lang.StringUtils
import org.slf4j.LoggerFactory


    if (EntranceConfiguration.DEFAULT_REQUEST_APPLICATION_NAME.getValue.equals(creator) && StringUtils.isEmpty(source.get(TaskConstant.SCRIPTPATH)) &&
      StringUtils.isEmpty(executionCode))
      throw new EntranceIllegalParamException(20007, "param executionCode and scriptPath can not be empty at the same time")
    var runType:String = null
    if (StringUtils.isNotEmpty(executionCode)) {
      runType = params.get(TaskConstant.RUNTYPE).asInstanceOf[String]
      if (StringUtils.isEmpty(runType)) runType = EntranceConfiguration.DEFAULT_RUN_TYPE.getValue
      //If formatCode is not empty, we need to format it(如果formatCode 不为空的话,我们需要将其进行格式化)
      if (formatCode) executionCode = format(executionCode)
      task.setExecutionCode(executionCode)
    }
    task.setSource(source)
    task.setEngineType(runType)
    //为了兼容代码,让engineType和runType都有同一个属性
    task.setRunType(runType)
    task.setExecuteApplicationName(executeApplicationName)
    task.setRequestApplicationName(creator)
    task.setStatus(SchedulerEventState.Inited.toString)
    task
  }
  //todo to format code using proper way
  private def format(code:String):String = code

} 
Example 90
Source File: CommentInterceptor.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.interceptor.impl

import java.lang
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor
import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask
import com.webank.wedatasphere.linkis.protocol.task.Task
import org.slf4j.{Logger, LoggerFactory}

import scala.util.matching.Regex

"
  override def dealComment(code: String): String = {
    val p = Pattern.compile(scalaCommentPattern)
    p.matcher(code).replaceAll("$1")
  }
}


object CommentMain{
  def main(args: Array[String]): Unit = {
    val sqlCode = "select * from default.user;--你好;show tables"
    val sqlCode1 = "select * from default.user--你好;show tables"
    println(SQLCommentHelper.dealComment(sqlCode))
  }
} 
Example 91
Source File: EntranceGroupFactory.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.scheduler

import com.webank.wedatasphere.linkis.entrance.conf.EntranceConfiguration
import com.webank.wedatasphere.linkis.entrance.execute.EntranceJob
import com.webank.wedatasphere.linkis.entrance.persistence.HaPersistenceTask
import com.webank.wedatasphere.linkis.protocol.config.{RequestQueryAppConfig, ResponseQueryConfig}
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue.ParallelGroup
import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, SchedulerEvent}
import com.webank.wedatasphere.linkis.server.JMap
import org.apache.commons.lang.StringUtils
import org.slf4j.{Logger, LoggerFactory}


class EntranceGroupFactory extends GroupFactory {

  private val groupNameToGroups = new JMap[String, Group]
  private val logger:Logger = LoggerFactory.getLogger(classOf[EntranceGroupFactory])
  override def getOrCreateGroup(groupName: String): Group = {
    if(!groupNameToGroups.containsKey(groupName)) synchronized{
      //TODO Query the database and get initCapacity, maxCapacity, maxRunningJobs, maxAskExecutorTimes(查询数据库,拿到initCapacity、maxCapacity、maxRunningJobs、maxAskExecutorTimes)
      val initCapacity = 100
      val maxCapacity = 100
      var maxRunningJobs =  EntranceConfiguration.WDS_LINKIS_INSTANCE.getValue
      val maxAskExecutorTimes = EntranceConfiguration.MAX_ASK_EXECUTOR_TIME.getValue.toLong
      if (groupName.split("_").length < 2){
        logger.warn(s"name style of group: $groupName is not correct, we will set default value for the group")
      }else{
        val sender:Sender = Sender.getSender(EntranceConfiguration.CLOUD_CONSOLE_CONFIGURATION_SPRING_APPLICATION_NAME.getValue)
        val creator = groupName.split("_")(0)
        val username = groupName.split("_")(1)
        val engineName = EntranceConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue
        val engineType = if (engineName.trim().toLowerCase().contains("engine")) engineName.substring(0, engineName.length - "engine".length) else "spark"
        logger.info(s"Getting parameters for $groupName(正在为 $groupName 获取参数) username: $username, creator:$creator, engineType: $engineType")
        val keyAndValue = sender.ask(RequestQueryAppConfig(username, creator, engineType)).asInstanceOf[ResponseQueryConfig].getKeyAndValue
        try{
          maxRunningJobs = Integer.parseInt(keyAndValue.get(EntranceConfiguration.WDS_LINKIS_INSTANCE.key))
        }catch{
          case t:Throwable => logger.warn("Get maxRunningJobs from configuration server failed! Next use the default value to continue.",t)
        }
      }
      logger.info("groupName: {} =>  maxRunningJobs is {}", groupName, maxRunningJobs)
      val group = new ParallelGroup(groupName, initCapacity, maxCapacity)
      group.setMaxRunningJobs(maxRunningJobs)
      group.setMaxAskExecutorTimes(maxAskExecutorTimes)
      if(!groupNameToGroups.containsKey(groupName)) groupNameToGroups.put(groupName, group)
    }
    groupNameToGroups.get(groupName)
  }


  override def getGroupNameByEvent(event: SchedulerEvent): String = event match {
    case job: EntranceJob =>
      job.getTask match {
        case HaPersistenceTask(task) =>
          "HA"
        case _ =>EntranceGroupFactory.getGroupName(job.getCreator, job.getUser)
      }
  }
}
object EntranceGroupFactory {
  def getGroupName(creator: String, user: String): String = {
    if (StringUtils.isNotEmpty(creator)) creator + "_" + user
    else EntranceConfiguration.DEFAULT_REQUEST_APPLICATION_NAME.getValue + "_" + user
  }
} 
Example 92
Source File: DataWorkCloudEngineApplication.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine

import java.text.SimpleDateFormat
import java.util.Date

import com.webank.wedatasphere.linkis.DataWorkCloudApplication
import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.engine.conf.EngineConfiguration
import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration
import org.apache.commons.lang.StringUtils
import org.slf4j.LoggerFactory


object DataWorkCloudEngineApplication {

  val userName:String = System.getProperty("user.name")
  val hostName:String = Utils.getComputerName
  val appName:String = EngineConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue
  val prefixName:String = EngineConfiguration.ENGINE_LOG_PREFIX.getValue
  val timeStamp:Long = System.currentTimeMillis()
  private val timeFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss")
  private val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
  val time:String = timeFormat.format(new Date(timeStamp))
  val date:String = dateFormat.format(new Date(timeStamp))

  val isTimeStampSuffix:Boolean = "true".equalsIgnoreCase(EngineConfiguration.ENGINE_LOG_TIME_STAMP_SUFFIX.getValue)
  val shortLogFile:String =
    if (isTimeStampSuffix) appName + "_" + hostName + "_" + userName + "_"  + time + ".log"
    else appName + "_" + hostName + "_" + userName + ".log"
  val logName:String =
    if(isTimeStampSuffix) prefixName + "/" + userName + "/" + shortLogFile
    else prefixName + "/" + shortLogFile
  System.setProperty("engineLogFile", logName)
  System.setProperty("shortEngineLogFile", shortLogFile)
//  System.setProperty("engineLogFile", logName)
//  val context:LoggerContext = LogManager.getContext(false).asInstanceOf[LoggerContext]
//  val path:String = getClass.getResource("/").getPath
//  val log4j2XMLFile:File = new File(path + "/log4j2-engine.xml")
//  val configUri:URI = log4j2XMLFile.toURI
//  context.setConfigLocation(configUri)
  private val logger = LoggerFactory.getLogger(getClass)
  logger.info(s"Now log4j2 Rolling File is set to be $logName")
  logger.info(s"Now shortLogFile is set to be $shortLogFile")
  def main(args: Array[String]): Unit = {
    val parser = DWCArgumentsParser.parse(args)
    DWCArgumentsParser.setDWCOptionMap(parser.getDWCConfMap)
    val existsExcludePackages = ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.getValue
    if(StringUtils.isEmpty(existsExcludePackages))
      DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, "com.webank.wedatasphere.linkis.enginemanager")
    else
      DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, existsExcludePackages + ",com.webank.wedatasphere.linkis.enginemanager")
    DataWorkCloudApplication.main(DWCArgumentsParser.formatSpringOptions(parser.getSpringConfMap))
  }
} 
Example 93
Source File: Logging.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.common.utils

import org.slf4j.LoggerFactory


trait Logging {

  protected lazy implicit val logger = LoggerFactory.getLogger(getClass)

  def trace(message: => String) = {
    if (logger.isTraceEnabled) {
      logger.trace(message.toString)
    }
  }

  def debug(message: => String): Unit = {
    if (logger.isDebugEnabled) {
      logger.debug(message.toString)
    }
  }

  def info(message: => String): Unit = {
    if (logger.isInfoEnabled) {
      logger.info(message.toString)
    }
  }

  def info(message: => String, t: Throwable): Unit = {
    logger.info(message.toString, t)
  }

  def warn(message: => String): Unit = {
    logger.warn(message.toString)
  }

  def warn(message: => String, t: Throwable): Unit = {
    logger.warn(message.toString, t)
  }

  def error(message: => String, t: Throwable): Unit = {
    logger.error(message.toString, t)
  }

  def error(message: => String): Unit = {
    logger.error(message.toString)
  }
} 
Example 94
Source File: LyftConnector.scala    From scala-spark-cab-rides-predictions   with MIT License 5 votes vote down vote up
package rides.connector

import actors.CabRideSystem
import com.lyft.networking.apiObjects.CostEstimateResponse
import com.lyft.networking.apis.LyftPublicApi
import com.lyft.networking.{ApiConfig, LyftApiFactory}
import org.slf4j.LoggerFactory
import rides.connector.LyftConnectorConfig.rideService

import scala.concurrent.Future
import scala.util.Properties.envOrElse

class LyftConnector extends RidesConnector[CostEstimateResponse] {

  
private object LyftConnectorConfig {
  private val log = LoggerFactory.getLogger(LyftConnectorConfig.getClass)
  private val apiConfig: ApiConfig = new ApiConfig.Builder()
    .setClientId(envOrElse("lyft_clientID", "NOT_DEFINED"))
    .setClientToken(envOrElse("lyft_client_token", "NOT_DEFINED"))
    .build
  val rideService: LyftPublicApi = new LyftApiFactory(apiConfig).getLyftPublicApi

  log.info("Starting Lyft Ride Service")
} 
Example 95
Source File: UberConnector.scala    From scala-spark-cab-rides-predictions   with MIT License 5 votes vote down vote up
package rides.connector

import actors.CabRideSystem
import com.uber.sdk.core.client.{ServerTokenSession, SessionConfiguration}
import com.uber.sdk.rides.client.UberRidesApi
import com.uber.sdk.rides.client.model.PriceEstimatesResponse
import com.uber.sdk.rides.client.services.RidesService
import org.slf4j.LoggerFactory
import rides.connector.UberConnectorConfig.rideService

import scala.concurrent.Future
import scala.util.Properties.envOrElse

class UberConnector extends RidesConnector[PriceEstimatesResponse] {
  
private object UberConnectorConfig {
  private val log = LoggerFactory.getLogger(UberConnectorConfig.getClass)
  private val config: SessionConfiguration = new SessionConfiguration.Builder()
    .setClientId(envOrElse("uber_clientId", "NOT_DEFINED"))
    .setServerToken(envOrElse("uber_token", "NOT_DEFINED"))
    .build
  private val session: ServerTokenSession = new ServerTokenSession(config)
  val rideService: RidesService = UberRidesApi.`with`(session).build.createService

  log.info("Starting Uber Ride Service")


} 
Example 96
Source File: RidesAPI.scala    From scala-spark-cab-rides-predictions   with MIT License 5 votes vote down vote up
package rides

import com.lyft.networking.apiObjects.CostEstimateResponse
import com.uber.sdk.rides.client.model.PriceEstimatesResponse
import models.{CabPrice, Location, LyftPriceModel, UberPriceModel}
import org.slf4j.LoggerFactory
import rides.connector.{LyftConnector, RidesConnector, UberConnector}

import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.concurrent.{Await, Future}


    override def getPrices(source: Location, destination: Location): Set[CabPrice] = {
      // future wrapped price estimate from lyft api

      val cef: Future[CostEstimateResponse] = ridesConnector.getPriceEstimates(source.latitude, source.longitude, destination.latitude, destination.longitude)

      //process data in sync
      val result: CostEstimateResponse = Await.result(cef, 30 seconds)
      result match {
        case cer: CostEstimateResponse => {
          cer.cost_estimates
            .asScala.map(LyftPriceModel(_, source, destination))
            .toSet
        }
        // in case of failure just send blank set to avoid failures
        case q => log.error("Failed to fetch uber records. Got " + q + " instead of CostEstimateResponse"); Set()
      }

    }

  }

} 
Example 97
Source File: HostsStatuses.scala    From algoliasearch-client-scala   with MIT License 5 votes vote down vote up
package algolia

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{Logger, LoggerFactory}

case class HostsStatuses(
    configuration: AlgoliaClientConfiguration,
    utils: AlgoliaUtils,
    queryHosts: Seq[String],
    indexingHosts: Seq[String]
) {

  private[algolia] val hostStatuses: ConcurrentHashMap[String, HostStatus] =
    new ConcurrentHashMap[String, HostStatus](5)

  val logger: Logger = LoggerFactory.getLogger("algoliasearch")

  def markHostAsUp(host: String): Unit = {
    logger.debug("Marking {} as `up`", host)
    hostStatuses.put(host, HostStatus.up(utils.now()))
  }

  def markHostAsDown(host: String): Unit = {
    logger.debug("Marking {} as `down`", host)
    hostStatuses.put(host, HostStatus.down(utils.now()))
  }

  def indexingHostsThatAreUp(): Seq[String] = hostsThatAreUp(indexingHosts)

  def queryHostsThatAreUp(): Seq[String] = hostsThatAreUp(queryHosts)

  private def hostsThatAreUp(hosts: Seq[String]): Seq[String] = {
    val filteredHosts = hosts.filter(h => isUpOrCouldBeRetried(getHostStatus(h))
    )
    if (filteredHosts.isEmpty) {
      hosts
    } else {
      filteredHosts
    }
  }

  def isUpOrCouldBeRetried(hostStatus: HostStatus): Boolean =
    hostStatus.up || (utils
      .now() - hostStatus.updatedAt) >= configuration.hostDownTimeoutMs

  private def getHostStatus(host: String): HostStatus =
    hostStatuses.getOrDefault(host, HostStatus.up(utils.now()))
}

private case class HostStatus(up: Boolean, updatedAt: Long)

private object HostStatus {

  def up(now: Long) = HostStatus(up = true, now)

  def down(now: Long) = HostStatus(up = false, now)

} 
Example 98
Source File: AsciidoctorJgitIncludeProcessor.scala    From gitbucket-asciidoctor-plugin   with Apache License 2.0 5 votes vote down vote up
package tobiasroeser.gitbucket.asciidoctor

import java.io.File
import java.net.URI
import java.util

import gitbucket.core.service.{AccountService, RepositoryService}
import gitbucket.core.service.RepositoryService.RepositoryInfo
import gitbucket.core.util.{JGitUtil, StringUtil}
import gitbucket.core.util.Directory._
import gitbucket.core.util.SyntaxSugars._
import org.asciidoctor.ast.DocumentRuby
import org.asciidoctor.extension.{IncludeProcessor, PreprocessorReader}
import org.eclipse.jgit.api.Git
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._

class AsciidoctorJgitIncludeProcessor(config: java.util.Map[String, Object]) extends IncludeProcessor(config)
  with RepositoryService with AccountService{
  val logger = LoggerFactory.getLogger(getClass)

  override def handles(target: String): Boolean = {
    true
  }

  override def process(document: DocumentRuby, reader: PreprocessorReader, target: String, attributes: util.Map[String, AnyRef]): Unit = {
    val documentPath = URI.create(document.getAttr("gitbucket-path").toString)
    val repository = document.getAttr("gitbucket-repository").asInstanceOf[RepositoryInfo]
    val branch = document.getAttr("gitbucket-branch").toString
    val targetPath = documentPath.resolve(target)

    using(Git.open(getRepositoryDir(repository.owner, repository.name))) { git =>
      val revCommit = JGitUtil.getRevCommitFromId(git, git.getRepository.resolve(branch))
      JGitUtil.getContentFromPath(git, revCommit.getTree, targetPath.toString, true).map{ bytes =>
        val content = StringUtil.convertFromByteArray(bytes)
        val embed = if(attributes.asScala.contains("lines")){
          val lines = attributes.get("lines").toString
          val linesRe = """(\d+)\.\.(\d+)""".r
          lines match {
            case linesRe(start, end) =>
              content.split("""\r?\n""").slice(start.toInt - 1, end.toInt).mkString("\n")
          }
        }else{
          content
        }
        reader.push_include(embed, target, target, 1, attributes)
      }
    }
  }
} 
Example 99
Source File: InitialProcessing.scala    From iodb   with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
package io.iohk.iodb.bench

import java.io.File

import ch.qos.logback.classic.LoggerContext
import io.iohk.iodb.{ByteArrayWrapper, ShardedStore, Store, TestUtils}
import org.slf4j.LoggerFactory


object InitialProcessing extends Benchmark {
  val Milestones = Seq(1000, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000)

  val Inputs = 5500
  //average number of inputs per block
  val Outputs = 6000 //average number of outputs per block

  def bench(store: Store, dir: File): Unit = {
    println(s"Store: $store")

    Milestones.foldLeft((0, 0L, Seq[ByteArrayWrapper]())) {
      case ((prevMilestone, prevTime, prevCache), milestone) =>
        val (time, newCache) = TestUtils.runningTime {
          (prevMilestone + 1 to milestone).foldLeft(prevCache) { case (cache, version) =>
            processBlock(version, store, Inputs, Outputs, cache).get.take(Inputs * 100)
          }
        }
        val newTime = prevTime + time
        println(s"Time  to get to $milestone: $time")
        (milestone, newTime, newCache)
    }

    store.close()
    TestUtils.deleteRecur(dir)
  }

  def main(args: Array[String]): Unit = {
    //switching off logging
    val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext]
    context.stop()

    var dir = TestUtils.tempDir()
    bench(new ShardedStore(dir, keySize = KeySize), dir)

    System.gc()
    Thread.sleep(15000)
    println("======================================")

    dir = TestUtils.tempDir()
    bench(new RocksStore(dir), dir)
  }
} 
Example 100
Source File: SparkLog4jExample.scala    From pulse   with Apache License 2.0 5 votes vote down vote up
package io.phdata.pams.example

import org.apache.spark.{ SparkConf, SparkContext }
import org.slf4j.LoggerFactory


object SparkLog4jExample {

  private val log = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {
    log.info("Starting up the spark logging example")
    val conf = new SparkConf().setAppName("Pulse Spark Logging Example")
    val sc   = SparkContext.getOrCreate(conf)

    try {
      run(sc, numEvents = 10000)
    } finally {
      sc.stop()
    }
  }

  def run(sc: SparkContext, numEvents: Int): Unit = {
    val testData = 1 to numEvents
    val testRdd  = sc.parallelize(testData)

    testRdd.foreach { num =>
      if (num % 10000 == 0) {
        log.error(s"XXXXX error! num: " + num)
      } else if (num % 5000 == 0) {
        log.warn(s"XXXXX warning! num: " + num)
      } else {
        log.info(s"XXXXX found: " + num)
      }
    }

    log.info("Shutting down the spark logging example")
  }

} 
Example 101
Source File: AbstractAlertTrigger.scala    From pulse   with Apache License 2.0 5 votes vote down vote up
package io.phdata.pulse.alertengine.trigger

import com.typesafe.scalalogging.Logger
import io.phdata.pulse.alertengine.{ AlertRule, AlertsDb, TriggeredAlert }
import org.slf4j.LoggerFactory


  def query(applicationName: String, alertRule: AlertRule): Seq[Map[String, Any]]

  override final def check(applicationName: String, alertRule: AlertRule): Option[TriggeredAlert] =
    if (AlertsDb.shouldCheck(applicationName, alertRule)) {
      try {
        val results = query(applicationName, alertRule)
        processResults(applicationName, alertRule, results)
      } catch {
        case e: Exception =>
          e.printStackTrace()
          logger.error(s"Error running query for $applicationName with alert $alertRule", e)
          None
      }
    } else {
      None
    }

  private def processResults(applicationName: String,
                             alertRule: AlertRule,
                             results: Seq[Map[String, Any]]): Option[TriggeredAlert] = {
    val numFound  = results.size
    val threshold = alertRule.resultThreshold.getOrElse(0)
    if (threshold == -1 && results.isEmpty) {
      logger.info(
        s"Alert triggered for $applicationName on alert $alertRule at no results found condition")
      AlertsDb.markTriggered(applicationName, alertRule)
      Some(TriggeredAlert(alertRule, applicationName, results, 0))
    } else if (results.lengthCompare(threshold) > 0) {
      logger.info(s"Alert triggered for $applicationName on alert $alertRule")
      AlertsDb.markTriggered(applicationName, alertRule)
      Some(TriggeredAlert(alertRule, applicationName, results, numFound))
    } else {
      logger.info(s"No alert needed for $applicationName with alert $alertRule")
      None
    }
  }
} 
Example 102
Source File: DeltaRecordReaderWrapper.scala    From connectors   with Apache License 2.0 5 votes vote down vote up
package io.delta.hive

import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.Reporter
import org.apache.parquet.hadoop.ParquetInputFormat
import org.slf4j.LoggerFactory


  private def insertPartitionValues(value: ArrayWritable): Unit = {
    val valueArray = value.get()
    var i = 0
    val n = partitionValues.length
    // Using while loop for better performance since this method is called for each row.
    while (i < n) {
      val partition = partitionValues(i)
      // The schema of `valueArray` is the Hive schema, and it's the same as the Delta
      // schema since we have verified it in `DeltaInputFormat`. Hence, the position of a partition
      // column in `valueArray` is the same as its position in Delta schema.
      valueArray(partition._1) = partition._2
      i += 1
    }
  }
} 
Example 103
Source File: ReliableHttpProxyFactory.scala    From reliable-http-client   with Apache License 2.0 5 votes vote down vote up
package rhttpc.akkahttp.proxy

import akka.NotUsed
import akka.actor._
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpEntity, HttpRequest, HttpResponse}
import akka.stream.Materializer
import akka.stream.scaladsl.{Flow, Sink, Source}
import org.slf4j.LoggerFactory
import rhttpc.client.protocol.{Correlated, Request}
import rhttpc.client.proxy._

import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}
import scala.util.control.NonFatal
import scala.util.{Failure, Success}

object ReliableHttpProxyFactory {

  private lazy val logger = LoggerFactory.getLogger(getClass)

  def send(successRecognizer: SuccessHttpResponseRecognizer, batchSize: Int, parallelConsumers: Int)
          (request: Request[HttpRequest])
          (implicit actorSystem: ActorSystem, materialize: Materializer): Future[HttpResponse] = {
    import actorSystem.dispatcher
    send(prepareHttpFlow(batchSize * parallelConsumers), successRecognizer)(request.correlated)
  }

  private def prepareHttpFlow(parallelism: Int)
                             (implicit actorSystem: ActorSystem, materialize: Materializer):
    Flow[(HttpRequest, String), HttpResponse, NotUsed] = {

    import actorSystem.dispatcher
    Http().superPool[String]().mapAsync(parallelism) {
      case (tryResponse, id) =>
        tryResponse match {
          case Success(response) =>
            response.toStrict(1 minute)
          case Failure(ex) =>
            Future.failed(ex)
        }
    }
  }

  private def send(httpFlow: Flow[(HttpRequest, String), HttpResponse, Any], successRecognizer: SuccessHttpResponseRecognizer)
                  (corr: Correlated[HttpRequest])
                  (implicit ec: ExecutionContext, materialize: Materializer): Future[HttpResponse] = {
    import collection.JavaConverters._
    logger.debug(
      s"""Sending request for ${corr.correlationId} to ${corr.msg.getUri()}. Headers:
         |${corr.msg.getHeaders().asScala.toSeq.map(h => "  " + h.name() + ": " + h.value()).mkString("\n")}
         |Body:
         |${corr.msg.entity.asInstanceOf[HttpEntity.Strict].data.utf8String}""".stripMargin
    )
    val logResp = logResponse(corr) _
    val responseFuture = Source.single((corr.msg, corr.correlationId)).via(httpFlow).runWith(Sink.head)
    responseFuture.onComplete {
      case Failure(ex) =>
        logger.error(s"Got failure for ${corr.correlationId} to ${corr.msg.getUri()}", ex)
      case Success(_) =>
    }
    for {
      response <- responseFuture
      transformedToFailureIfNeed <- {
        if (successRecognizer.isSuccess(response)) {
          logResp(response, "success response")
          Future.successful(response)
        } else {
          logResp(response, "response recognized as non-success")
          Future.failed(NonSuccessResponse)
        }
      }
    } yield transformedToFailureIfNeed
  }

  private def logResponse(corr: Correlated[HttpRequest])
                         (response: HttpResponse, additionalInfo: String): Unit = {
    import collection.JavaConverters._
    logger.debug(
      s"""Got $additionalInfo for ${corr.correlationId} to ${corr.msg.getUri()}. Status: ${response.status.value}. Headers:
         |${response.getHeaders().asScala.toSeq.map(h => "  " + h.name() + ": " + h.value()).mkString("\n")}
         |Body:
         |${response.entity.asInstanceOf[HttpEntity.Strict].data.utf8String}""".stripMargin
    )
  }

} 
Example 104
Source File: AmqpPublisher.scala    From reliable-http-client   with Apache License 2.0 5 votes vote down vote up
package rhttpc.transport.amqp

import java.io._

import akka.agent.Agent
import com.rabbitmq.client._
import org.slf4j.LoggerFactory
import rhttpc.transport.SerializingPublisher.SerializedMessage
import rhttpc.transport.{Message, Publisher, Serializer, SerializingPublisher}
import rhttpc.utils.Recovered._

import scala.concurrent.{ExecutionContext, Future, Promise}

private[amqp] class AmqpPublisher[PubMsg](channel: Channel,
                                          queueName: String,
                                          exchangeName: String,
                                          protected val serializer: Serializer[PubMsg],
                                          prepareProperties: PartialFunction[SerializedMessage, AMQP.BasicProperties])
                                         (implicit ec: ExecutionContext)
  extends SerializingPublisher[PubMsg] with ConfirmListener {

  private lazy val logger = LoggerFactory.getLogger(getClass)

  private val seqNoOnAckPromiseAgent = Agent[Map[Long, Promise[Unit]]](Map.empty)

  override private[rhttpc] def publishSerialized(msg: SerializedMessage): Future[Unit] = {
    val properties = prepareProperties.applyOrElse(
      msg,
      (_: SerializedMessage) => throw new IllegalArgumentException(s"Not supported message type: $msg")
    )
    val ackPromise = Promise[Unit]()
    for {
      _ <- seqNoOnAckPromiseAgent.alter { curr =>
        val publishSeqNo = channel.getNextPublishSeqNo
        logger.debug(s"PUBLISH: $publishSeqNo")
        channel.basicPublish(exchangeName, queueName, properties, msg.content)
        curr + (publishSeqNo -> ackPromise)
      }
      ack <- ackPromise.future
    } yield ack
  }

  override def handleAck(deliveryTag: Long, multiple: Boolean): Unit = {
    logger.debug(s"ACK: $deliveryTag, multiple = $multiple")
    confirm(deliveryTag, multiple)(_.success(Unit))
  }

  override def handleNack(deliveryTag: Long, multiple: Boolean): Unit = {
    logger.debug(s"NACK: $deliveryTag, multiple = $multiple")
    confirm(deliveryTag, multiple)(_.failure(NoPubMsgAckException))
  }

  private def confirm(deliveryTag: Long, multiple: Boolean)
                     (complete: Promise[Unit] => Unit): Unit = {
    seqNoOnAckPromiseAgent.alter { curr =>
      val (toAck, rest) = curr.partition {
        case (seqNo, ackPromise) =>
          seqNo == deliveryTag || multiple && seqNo <= deliveryTag
      }
      toAck.foreach {
        case (seqNo, ackPromise) => complete(ackPromise)
      }
      rest
    }
  }

  override def start(): Unit = {}

  override def stop(): Future[Unit] = {
    recoveredFuture("completing publishing", currentPublishingFuturesComplete)
      .map(_ => recovered("channel closing", channel.close()))
  }

  private def currentPublishingFuturesComplete: Future[Unit] =
    seqNoOnAckPromiseAgent.future()
      .flatMap(map => Future.sequence(map.values.map(_.future)))
      .map(_ => Unit)
}

case object NoPubMsgAckException extends Exception(s"No acknowledgement for published message") 
Example 105
Source File: AmqpJdbcScheduler.scala    From reliable-http-client   with Apache License 2.0 5 votes vote down vote up
package rhttpc.transport.amqpjdbc

import akka.actor.{Cancellable, Scheduler}
import org.slf4j.LoggerFactory
import rhttpc.transport.SerializingPublisher.SerializedMessage
import rhttpc.transport._

import scala.concurrent.duration.FiniteDuration
import scala.concurrent.{ExecutionContext, Future}
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}

private[amqpjdbc] trait AmqpJdbcScheduler[PubMsg] {

  def schedule(msg: Message[PubMsg], delay: FiniteDuration): Future[Unit]

  def start(): Unit

  def stop(): Future[Unit]

}

private[amqpjdbc] class AmqpJdbcSchedulerImpl[PubMsg](scheduler: Scheduler,
                                                      checkInterval: FiniteDuration,
                                                      repo: ScheduledMessagesRepository,
                                                      queueName: String,
                                                      batchSize: Int,
                                                      publisher: SerializingPublisher[PubMsg])
                                                     (implicit ec: ExecutionContext,
                                                      serializer: Serializer[PubMsg]) extends AmqpJdbcScheduler[PubMsg] {
  private val logger = LoggerFactory.getLogger(getClass)

  private var ran: Boolean = false
  private var scheduledCheck: Option[Cancellable] = None
  private var currentPublishedFetchedFuture: Future[Int] = Future.successful(0)

  override def schedule(msg: Message[PubMsg], delay: FiniteDuration): Future[Unit] = {
    val serialized = serializer.serialize(msg.content)
    repo.save(MessageToSchedule(queueName, serialized, msg.properties, delay))
  }

  override def start(): Unit = {
    synchronized {
      if (!ran) {
        ran = true
        publishFetchedMessagesThanReschedule()
      }
    }
  }

  private def publishFetchedMessagesThanReschedule(): Unit = {
    synchronized {
      if (ran) {
        val publishedFetchedFuture = repo.fetchMessagesShouldByRun(queueName, batchSize)(publish)
        currentPublishedFetchedFuture = publishedFetchedFuture
        publishedFetchedFuture onComplete handlePublicationResult
      }
    }
  }

  private def publish(messages: Seq[ScheduledMessage]): Future[Seq[Unit]] = {
    if (messages.nonEmpty) {
      logger.debug(s"Fetched ${messages.size}, publishing")
    }
    val handlingFutures = messages.map { message =>
      publisher.publishSerialized(SerializedMessage(message.content.getBytes(), message.properties))
    }
    Future.sequence(handlingFutures)
  }

  private def handlePublicationResult(tryResult: Try[Int]): Unit = {
    tryResult match {
      case Failure(ex) =>
        logger.error("Exception while publishing fetched messages", ex)
      case _ =>
    }
    synchronized {
      if (ran) {
        scheduledCheck = Some(scheduler.scheduleOnce(checkInterval)(publishFetchedMessagesThanReschedule()))
      } else {
        logger.debug(s"Scheduler is stopping, next check will be skipped")
      }
    }
  }

  override def stop(): Future[Unit] = {
    synchronized {
      scheduledCheck.foreach(_.cancel())
      ran = false
      currentPublishedFetchedFuture.map(_ => Unit)
    }
  }

} 
Example 106
Source File: Recovered.scala    From reliable-http-client   with Apache License 2.0 5 votes vote down vote up
package rhttpc.utils

import org.slf4j.LoggerFactory

import scala.concurrent.{ExecutionContext, Future}
import scala.util.control.NonFatal

object Recovered {

  private lazy val logger = LoggerFactory.getLogger(getClass)

  def recovered(action: String, run: => Unit): Unit = {
    try {
      run
    } catch {
      case NonFatal(ex) =>
        logger.error(s"Exception while $action", ex)

    }
  }

  def recoveredFuture(action: String, future: => Future[Unit])
                     (implicit ec: ExecutionContext): Future[Unit] = {
    try {
      future.recover {
        case NonFatal(ex) =>
          logger.error(s"Exception while $action", ex)
      }
    } catch {
      case NonFatal(ex) => // while preparing future
        logger.error(s"Exception while $action", ex)
        Future.successful(Unit)
    }
  }

} 
Example 107
Source File: FallbackPublisher.scala    From reliable-http-client   with Apache License 2.0 5 votes vote down vote up
package rhttpc.transport.fallback

import akka.actor.{ActorSystem, Scheduler}
import akka.pattern.CircuitBreaker
import org.slf4j.LoggerFactory
import rhttpc.transport.{Message, Publisher}

import scala.concurrent.Future
import scala.concurrent.duration.FiniteDuration
import scala.util.control.NonFatal

private[fallback] class FallbackPublisher[Msg](main: Publisher[Msg],
                                               fallback: Publisher[Msg])
                                              (maxFailures: Int,
                                               callTimeout: FiniteDuration,
                                               resetTimeout: FiniteDuration)
                                              (implicit system: ActorSystem) extends Publisher[Msg] {

  import system.dispatcher

  private val logger = LoggerFactory.getLogger(getClass)

  private val circuitBreaker = new CircuitBreaker(system.scheduler, maxFailures, callTimeout, resetTimeout)
    .onOpen(logger.debug("Circuit opened"))
    .onHalfOpen(logger.debug("Circuit half-opened"))
    .onClose(logger.debug("Circuit closed"))

  override def publish(msg: Message[Msg]): Future[Unit] = {
    circuitBreaker.withCircuitBreaker(main.publish(msg)).recoverWith {
      case NonFatal(ex) =>
        logger.debug(s"Circuit is opened, sending message [${msg.getClass.getName}] to fallback transport")
        fallback.publish(msg)
    }
  }

  override def start(): Unit = {
    main.start()
    fallback.start()
  }

  override def stop(): Future[Unit] = {
    import rhttpc.utils.Recovered._
    recoveredFuture("stopping main publisher", main.stop())
      .flatMap(_ => recoveredFuture("stopping fallback publisher", fallback.stop()))
  }
} 
Example 108
Source File: Slf4jLogger.scala    From zio-logging   with Apache License 2.0 5 votes vote down vote up
package zio.logging.slf4j

import org.slf4j.{ LoggerFactory, MDC }
import zio.internal.Tracing
import zio.internal.stacktracer.Tracer
import zio.internal.stacktracer.ZTraceElement.{ NoLocation, SourceLocation }
import zio.internal.stacktracer.impl.AkkaLineNumbersTracer
import zio.internal.tracing.TracingConfig
import zio.logging.Logging
import zio.logging._
import zio.{ ZIO, ZLayer }

import scala.jdk.CollectionConverters._
object Slf4jLogger {

  private val tracing = Tracing(Tracer.globallyCached(new AkkaLineNumbersTracer), TracingConfig.enabled)

  private def classNameForLambda(lambda: => AnyRef) =
    tracing.tracer.traceLocation(() => lambda) match {
      case SourceLocation(_, clazz, _, _) => Some(clazz)
      case NoLocation(_)                  => None
    }

  private def logger(name: String) =
    ZIO.effectTotal(
      LoggerFactory.getLogger(
        name
      )
    )

  def make(
    logFormat: (LogContext, => String) => String,
    rootLoggerName: Option[String] = None
  ): ZLayer[Any, Nothing, Logging] =
    Logging.make(
      logger = { (context, line) =>
        val loggerName = context.get(LogAnnotation.Name) match {
          case Nil   => classNameForLambda(line).getOrElse("ZIO.defaultLogger")
          case names => LogAnnotation.Name.render(names)
        }
        logger(loggerName).map {
          slf4jLogger =>
            val maybeThrowable = context.get(LogAnnotation.Throwable).orNull
            context.get(LogAnnotation.Level).level match {
              case LogLevel.Off.level   => ()
              case LogLevel.Debug.level => slf4jLogger.debug(logFormat(context, line), maybeThrowable)
              case LogLevel.Trace.level => slf4jLogger.trace(logFormat(context, line), maybeThrowable)
              case LogLevel.Info.level  => slf4jLogger.info(logFormat(context, line), maybeThrowable)
              case LogLevel.Warn.level  => slf4jLogger.warn(logFormat(context, line), maybeThrowable)
              case LogLevel.Error.level => slf4jLogger.error(logFormat(context, line), maybeThrowable)
              case LogLevel.Fatal.level => slf4jLogger.error(logFormat(context, line), maybeThrowable)
            }
        }
      },
      rootLoggerName = rootLoggerName
    )

  
  def makeWithAnnotationsAsMdc(
    mdcAnnotations: List[LogAnnotation[_]],
    logFormat: (LogContext, => String) => String = (_, s) => s,
    rootLoggerName: Option[String] = None
  ): ZLayer[Any, Nothing, Logging] = {
    val annotationNames = mdcAnnotations.map(_.name)

    Logging.make(
      (context, line) => {
        val loggerName = context.get(LogAnnotation.Name) match {
          case Nil   => classNameForLambda(line).getOrElse("ZIO.defaultLogger")
          case names => LogAnnotation.Name.render(names)
        }
        logger(loggerName).map {
          slf4jLogger =>
            val maybeThrowable = context.get(LogAnnotation.Throwable).orNull

            val mdc: Map[String, String] = context.renderContext.filter {
              case (k, _) => annotationNames.contains(k)
            }
            MDC.setContextMap(mdc.asJava)
            context.get(LogAnnotation.Level).level match {
              case LogLevel.Off.level   => ()
              case LogLevel.Debug.level => slf4jLogger.debug(logFormat(context, line), maybeThrowable)
              case LogLevel.Trace.level => slf4jLogger.trace(logFormat(context, line), maybeThrowable)
              case LogLevel.Info.level  => slf4jLogger.info(logFormat(context, line), maybeThrowable)
              case LogLevel.Warn.level  => slf4jLogger.warn(logFormat(context, line), maybeThrowable)
              case LogLevel.Error.level => slf4jLogger.error(logFormat(context, line), maybeThrowable)
              case LogLevel.Fatal.level => slf4jLogger.error(logFormat(context, line), maybeThrowable)
            }
            MDC.clear()
        }

      },
      rootLoggerName = rootLoggerName
    )
  }
} 
Example 109
Source File: SampleRoutes.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package akkahttp

import java.io.File

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.sys.process.Process
import scala.util.{Failure, Success}


object SampleRoutes extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SampleRoutes")
  implicit val executionContext = system.dispatcher


  def getFromBrowsableDir: Route = {
    val dirToBrowse = File.separator + "tmp"

    // pathPrefix allows loading dirs and files recursively
    pathPrefix("entries") {
      getFromBrowseableDirectory(dirToBrowse)
    }
  }

  def parseFormData: Route = path("post") {
    formFields('color, 'age.as[Int]) { (color, age) =>
      complete(s"The color is '$color' and the age is $age")
    }
  }

  def routes: Route = {
    getFromBrowsableDir ~ parseFormData
  }

  val bindingFuture = Http().bindAndHandle(routes, "127.0.0.1", 8000)

  bindingFuture.onComplete {
    case Success(b) =>
      println("Server started, listening on: " + b.localAddress)
    case Failure(e) =>
      println(s"Server could not bind to... Exception message: ${e.getMessage}")
      system.terminate()
  }

  def browserClient() = {
    val os = System.getProperty("os.name").toLowerCase
    if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").!
  }

  browserClient()

  sys.addShutdownHook {
    println("About to shutdown...")
    val fut = bindingFuture.map(serverBinding => serverBinding.terminate(hardDeadline = 3.seconds))
    println("Waiting for connections to terminate...")
    val onceAllConnectionsTerminated = Await.result(fut, 10.seconds)
    println("Connections terminated")
    onceAllConnectionsTerminated.flatMap { _ => system.terminate()
    }
  }
} 
Example 110
package sample.stream

import akka.actor.ActorSystem
import akka.stream.Supervision.Decider
import akka.stream._
import akka.stream.scaladsl.{Flow, Sink, Source, SourceQueueWithComplete}
import akka.{Done, NotUsed}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}


object PublishToSourceQueueFromMultipleThreads extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("PublishToSourceQueueFromMultipleThreads")
  implicit val ec = system.dispatcher

  val bufferSize = 100
  // As of akka 2.6.x there is a thread safe implementation for SourceQueue
  val maxConcurrentOffers = 1000
  val numberOfPublishingClients = 1000

  val slowSink: Sink[Seq[Int], NotUsed] =
    Flow[Seq[Int]]
      .delay(2.seconds, DelayOverflowStrategy.backpressure)
      .to(Sink.foreach(e => logger.info(s"Reached sink: $e")))

  val sourceQueue: SourceQueueWithComplete[Int] = Source
    .queue[Int](bufferSize, OverflowStrategy.backpressure, maxConcurrentOffers)
    .groupedWithin(10, 1.seconds)
    .to(slowSink)
    .run

  val doneConsuming: Future[Done] = sourceQueue.watchCompletion()
  signalWhen(doneConsuming, "consuming") //never completes

  simulatePublishingFromMulitpleThreads()

  // Before 2.6.x a stream had to be used to throttle and control the backpressure
  //simulatePublishingClientsFromStream()

  // Decide on the stream level, because the OverflowStrategy.backpressure
  // on the sourceQueue causes an IllegalStateException
  // Handling this on the stream level allows to restart the stream
  private def simulatePublishingClientsFromStream() = {

    val decider: Decider = {
      case _: IllegalStateException => println("Got backpressure signal for offered element, restart..."); Supervision.Restart
      case _ => Supervision.Stop
    }

    val donePublishing: Future[Done] = Source(1 to numberOfPublishingClients)
      .mapAsync(10)(offerToSourceQueue) //throttle
      .withAttributes(ActorAttributes.supervisionStrategy(decider))
      .runWith(Sink.ignore)
    signalWhen(donePublishing, "publishing")
  }

  private def simulatePublishingFromMulitpleThreads() = (1 to numberOfPublishingClients).par.foreach(offerToSourceQueue)

  private def offerToSourceQueue(each: Int) = {
    sourceQueue.offer(each).map {
      case QueueOfferResult.Enqueued => logger.info(s"enqueued $each")
      case QueueOfferResult.Dropped => logger.info(s"dropped $each")
      case QueueOfferResult.Failure(ex) => logger.info(s"Offer failed: $ex")
      case QueueOfferResult.QueueClosed => logger.info("Source Queue closed")
    }
  }

  private def signalWhen(done: Future[Done], operation: String) = {
    done.onComplete {
      case Success(b) =>
        logger.info(s"Finished: $operation")
      case Failure(e) =>
        logger.info(s"Failure: $e About to terminate...")
        system.terminate()
    }
  }
} 
Example 111
Source File: TweetExample.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream

import java.time.{Instant, ZoneId}

import akka.NotUsed
import akka.actor.{ActorSystem, Cancellable}
import akka.stream.DelayOverflowStrategy
import akka.stream.scaladsl.{Flow, MergePrioritized, Sink, Source}
import org.apache.commons.lang3.exception.ExceptionUtils
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration._
import scala.util.{Failure, Success}



object TweetExample extends App {
  implicit val system = ActorSystem("TweetExample")
  implicit val ec = system.dispatcher
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  final case class Author(handle: String)

  final case class Hashtag(name: String)

  final case class Tweet(author: Author, timestamp: Long, body: String) {
    def hashtags: Set[Hashtag] =
      body.split(" ").collect { case t if t.startsWith("#") => Hashtag(t) }.toSet

    override def toString = {
      val localDateTime = Instant.ofEpochMilli(timestamp).atZone(ZoneId.systemDefault()).toLocalDateTime
      s"$localDateTime - ${author.handle} tweeted: ${body.take(5)}..."
    }
  }

  val akkaTag = Hashtag("#akka")

  val tweetsLowPrio: Source[Tweet, Cancellable] = Source.tick(1.second, 200.millis, NotUsed).map(_ => Tweet(Author("LowPrio"), System.currentTimeMillis, "#other #akka aBody"))
  val tweetsHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("HighPrio"), System.currentTimeMillis, "#akka #other aBody"))
  val tweetsVeryHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("VeryHighPrio"), System.currentTimeMillis, "#akka #other aBody"))

  val limitedTweets: Source[Tweet, NotUsed] = Source.combine(tweetsLowPrio, tweetsHighPrio, tweetsVeryHighPrio)(_ => MergePrioritized(List(1, 10, 100))).take(20)

  val processingFlow = Flow[Tweet]
    .filter(_.hashtags.contains(akkaTag))
    .wireTap(each => logger.info(s"$each"))

  val slowDownstream  =
    Flow[Tweet]
      .delay(5.seconds, DelayOverflowStrategy.backpressure)

  val processedTweets =
    limitedTweets
      .via(processingFlow)
      .via(slowDownstream)
      .runWith(Sink.seq)

  processedTweets.onComplete {
    case Success(results) =>
      logger.info(s"Successfully processed: ${results.size} tweets")
      system.terminate()
    case Failure(exception) =>
      logger.info(s"The stream failed with: ${ExceptionUtils.getRootCause(exception)}")
      system.terminate()
  }
} 
Example 112
Source File: AsyncExecution.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream

import akka.Done
import akka.actor.ActorSystem
import akka.stream.ActorAttributes
import akka.stream.scaladsl.{Flow, Sink, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.util.{Failure, Success}


object AsyncExecution extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("AsyncExecution")
  implicit val ec = system.dispatcher

  def stage(name: String) =
    Flow[Int]
      .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}"))

  def stageBlocking(name: String) =
    Flow[Int]
      .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}"))
      .wireTap(_ => Thread.sleep(5000))
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))

  def sinkBlocking: Sink[Int, Future[Done]] =
    Sink.foreach { index: Int =>
      Thread.sleep(2000)
      logger.info(s"Slow sink processing element $index by ${Thread.currentThread().getName}")
     }
      //Adding a custom dispatcher creates an async boundary
      //see discussion in: https://discuss.lightbend.com/t/how-can-i-make-sure-that-fileio-frompath-is-picking-up-my-dispatcher/6528/4
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))


  val done = Source(1 to 10)
    .via(stage("A")).async
    //When activated instead of alsoTo(sinkBlocking): elements for stage C are held up by stage B
    //.via(stageBlocking("B")).async
    .alsoTo(sinkBlocking).async
    .via(stage("C")).async
    .runWith(Sink.ignore)

  //With alsoTo(sinkBlocking) the stages A and C signal "done" too early and thus would terminate the whole stream
  //The reason for this is the custom dispatcher in sinkBlocking
  //terminateWhen(done)

  def terminateWhen(done: Future[_]) = {
    done.onComplete {
      case Success(_) =>
        println("Flow Success. About to terminate...")
        system.terminate()
      case Failure(e) =>
        println(s"Flow Failure: $e. About to terminate...")
        system.terminate()
    }
  }
} 
Example 113
Source File: WaitForThreeFlowsToComplete.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream._
import akka.stream.scaladsl._
import akka.util.ByteString
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent._
import scala.concurrent.duration._


object WaitForThreeFlowsToComplete extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("WaitForThreeFlowsToComplete")
  implicit val ec = system.dispatcher

  def lineSink(filename: String): Sink[String, Future[IOResult]] =
    Flow[String]
      .map(s => ByteString(s + "\n"))
      .wireTap(_ => logger.info(s"Add line to file: $filename"))
      .toMat(FileIO.toPath(Paths.get(filename)))(Keep.right) //retain to the Future[IOResult]
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))

  val origSource = Source(1 to 10)
  //scan (= transform) the source
  val factorialsSource = origSource.scan(BigInt(1))((acc, next) => acc * next)

  val fastFlow = origSource.runForeach(i => logger.info(s"Reached sink: $i"))

  val slowFlow1 = factorialsSource
    .map(_.toString)
    .runWith(lineSink("factorial1.txt"))

  val slowFlow2 = factorialsSource
    .zipWith(Source(0 to 10))((num, idx) => s"$idx! = $num")
    .throttle(1, 1.second, 1, ThrottleMode.shaping)
    .runWith(lineSink("factorial2.txt"))

  val allDone = for {
    fastFlowDone <- fastFlow
    slowFlow1Done <- slowFlow1
    slowFlow2Done <- slowFlow2
  } yield (fastFlowDone, slowFlow1Done, slowFlow2Done)

  allDone.onComplete { results =>
    logger.info(s"Resulting futures from flows: $results - about to terminate")
    system.terminate()
  }
} 
Example 114
Source File: DistributedShellClient.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.examples.distributedshell

import java.util.concurrent.TimeUnit
import scala.concurrent.Await
import scala.concurrent.duration.Duration

import akka.pattern.ask
import org.slf4j.{Logger, LoggerFactory}

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster.ShellCommand
import org.apache.gearpump.util.{AkkaApp, Constants}


object DistributedShellClient extends AkkaApp with ArgumentsParser {
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LoggerFactory.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "appid" -> CLIOption[Int]("<the distributed shell appid>", required = true),
    "command" -> CLIOption[String]("<shell command>", required = true)
  )

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    implicit val system = context.system
    implicit val dispatcher = system.dispatcher
    val appid = config.getInt("appid")
    val command = config.getString("command")
    val appMaster = context.resolveAppID(appid)
    LOG.info(s"Resolved appMaster $appid address $appMaster, sending command $command")
    val future = (appMaster ? ShellCommand(command)).map { result =>
      LOG.info(s"Result: \n$result")
      context.close()
    }
    Await.ready(future, Duration(60, TimeUnit.SECONDS))
  }
} 
Example 115
Source File: CGroupProcessLauncher.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.worker

import java.io.File
import scala.sys.process.Process

import com.typesafe.config.Config
import org.slf4j.{Logger, LoggerFactory}

import org.apache.gearpump.cluster.scheduler.Resource
import org.apache.gearpump.util.{ProcessLogRedirector, RichProcess}


class CGroupProcessLauncher(val config: Config) extends ExecutorProcessLauncher {
  private val APP_MASTER = -1
  private val cgroupManager: Option[CGroupManager] = CGroupManager.getInstance(config)
  private val LOG: Logger = LoggerFactory.getLogger(getClass)

  override def cleanProcess(appId: Int, executorId: Int): Unit = {
    if (executorId != APP_MASTER) {
      cgroupManager.foreach(_.shutDownExecutor(appId, executorId))
    }
  }

  override def createProcess(
      appId: Int, executorId: Int, resource: Resource, appConfig: Config, options: Array[String],
    classPath: Array[String], mainClass: String, arguments: Array[String]): RichProcess = {
    val cgroupCommand = if (executorId != APP_MASTER) {
      cgroupManager.map(_.startNewExecutor(appConfig, resource.slots, appId,
        executorId)).getOrElse(List.empty)
    } else List.empty
    LOG.info(s"Launch executor $executorId with CGroup ${cgroupCommand.mkString(" ")}, " +
      s"classpath: ${classPath.mkString(File.pathSeparator)}")

    val java = System.getProperty("java.home") + "/bin/java"
    val command = cgroupCommand ++ List(java) ++ options ++ List("-cp", classPath
      .mkString(File.pathSeparator), mainClass) ++ arguments
    LOG.info(s"Starting executor process java $mainClass ${arguments.mkString(" ")}; " +
      s"options: ${options.mkString(" ")}")
    val logger = new ProcessLogRedirector()
    val process = Process(command).run(logger)
    new RichProcess(process, logger)
  }
} 
Example 116
Source File: ProcessLogRedirector.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.util

import java.io.{Closeable, Flushable}
import scala.sys.process.ProcessLogger

import org.slf4j.LoggerFactory


class ProcessLogRedirector extends ProcessLogger with Closeable with Flushable with ConsoleOutput {
  private val LOG = LoggerFactory.getLogger("redirect")

  // We only capture the first 1K chars
  private final val LENGTH = 1000
  private var _error: String = ""
  private var _output: String = ""

  def error: String = _error
  def output: String = _output

  def out(s: => String): Unit = {
    if (_output.length <= LENGTH) {
      _output += "\n" + s
    }
    LOG.info(s)
  }
  def err(s: => String): Unit = {
    if (_error.length <= LENGTH) {
      _error += "\n" + s
    }
    LOG.error(s)
  }
  def buffer[T](f: => T): T = f
  def close(): Unit = Unit
  def flush(): Unit = Unit
} 
Example 117
Source File: VT.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.assoc

import breeze.linalg._
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest}
import org.dizhang.seqspark.util.General.RichDouble
import org.slf4j.LoggerFactory

import scala.language.existentials


@SerialVersionUID(7727880001L)
trait VT extends AssocMethod {
  def nullModel: NM
  def x: Encode.VT
  def result: AssocMethod.Result
}

object VT {

  val logger = LoggerFactory.getLogger(getClass)

  def apply(nullModel: NM,
            x: Encode.Coding): VT with AssocMethod.AnalyticTest = {
    val nmf = nullModel match {
      case NM.Simple(y, b) => NM.Fit(y, b)
      case NM.Mutiple(y, c, b) => NM.Fit(y, c, b)
      case nm: NM.Fitted => nm
    }
    AnalyticScoreTest(nmf, x.asInstanceOf[Encode.VT])
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.VT])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    //println(s"scores: ${st.score.toArray.mkString(",")}")
    //println(s"variances: ${diag(st.variance).toArray.mkString(",")}")
    val m = x.asInstanceOf[Encode.VT].coding
    val ts = m.map{sv =>
      val st = ScoreTest(nm, sv)
      st.score(0)/st.variance(0, 0).sqrt
    }
    //val ts = st.score :/ diag(st.variance).map(x => x.sqrt)
    max(ts)
  }

  @SerialVersionUID(7727880101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.VT)
    extends VT with AssocMethod.AnalyticTest
  {

    val statistic = getStatistic(nullModel, x)
    val pValue = None
    def result: AssocMethod.VTAnalytic = {
      val info = s"MAFs=${x.coding.length}"
      AssocMethod.VTAnalytic(x.vars, x.size, statistic, pValue, info)
    }
  }

  @SerialVersionUID(7727880201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.VT)
    extends VT with AssocMethod.ResamplingTest
  {
    def pCount = {
      Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    }
    def result: AssocMethod.VTResampling =
      AssocMethod.VTResampling(x.vars, x.size, refStatistic, pCount)
  }

} 
Example 118
Source File: LCCSLiu.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat


import breeze.linalg.{sum, DenseVector => DV}
import breeze.numerics.pow
import org.dizhang.seqspark.stat.LCCSLiu._
import org.dizhang.seqspark.stat.{LinearCombinationChiSquare => LCCS}
import org.dizhang.seqspark.util.General.RichDouble
import org.slf4j.LoggerFactory


object LCCSLiu {

  val logger = LoggerFactory.getLogger(getClass)

  case class CDFLiu(pvalue: Double, ifault: Int) extends LCCS.CDF {
    def trace = Array(0.0)
    override def toString = "Pvalue:   %10f".format(pvalue)
  }

  trait CentralOneDF extends LinearCombinationChiSquare {
    def degreeOfFreedom = DV.ones[Double](size)
    def nonCentrality = DV.zeros[Double](size)
  }

  trait Old extends LCCSLiu {
    def a = if (squareOfS1LargerThanS2) 1.0/(s1 - (s1.square - s2).sqrt) else 1.0/s1
    def df = if (squareOfS1LargerThanS2) a.square - 2 * delta else c2.cube/c3.square
  }
  trait New extends LCCSLiu {
    def a = if (squareOfS1LargerThanS2) 1.0/(s1 - (s1.square - s2).sqrt) else 1.0/s2.sqrt
    def df = if (squareOfS1LargerThanS2) a.square - 2 * delta else 1.0/s2
  }
  @SerialVersionUID(7778550101L)
  case class Simple(lambda: DV[Double]) extends LCCSLiu with CentralOneDF with Old {
    val c1 = ck(1)
    val c2 = ck(2)
    val c3 = ck(3)
    val c4 = ck(4)
  }
  @SerialVersionUID(7778550201L)
  case class Modified(lambda: DV[Double]) extends LCCSLiu with CentralOneDF with New {
    val c1 = ck(1)
    val c2 = ck(2)
    val c3 = ck(3)
    val c4 = ck(4)
  }
  case class SimpleMoments(cs: IndexedSeq[Double]) extends LCCSLiu with CentralOneDF with Old {
    def lambda = DV.zeros[Double](0)
    override val c1 = cs(0)
    override val c2 = cs(1)
    override val c3 = cs(2)
    override val c4 = cs(3)
  }
  case class ModifiedMoments(cs: IndexedSeq[Double]) extends LCCSLiu with CentralOneDF with New {
    def lambda = DV.zeros[Double](0)
    override val c1 = cs(0)
    override val c2 = cs(1)
    override val c3 = cs(2)
    override val c4 = cs(3)
  }
}
@SerialVersionUID(7778550001L)
trait LCCSLiu extends LinearCombinationChiSquare {

  def ck(k: Int): Double = {
    val lbk = pow(lambda, k)
    (lbk dot degreeOfFreedom) + k * (lbk dot nonCentrality)
  }
  def c1:Double
  def c2:Double
  def c3:Double
  def c4:Double
  def s1:Double = c3/c2.cube.sqrt
  def s2:Double = c4/c2.square
  def muQ:Double = c1
  def sigmaQ:Double = (2 * c2).sqrt
  protected lazy val squareOfS1LargerThanS2: Boolean = {
    s1.square > s2
  }
  def a: Double
  def delta:Double = if (squareOfS1LargerThanS2) s1 * a.cube - a.square else 0.0
  def df: Double
  def sigmaX:Double = 2.0.sqrt * a
  def muX:Double = df + delta

  def cdf(cutoff: Double): CDFLiu = {
    //logger.debug(s"muX: $muX sigmaX: $sigmaX muQ: $muQ sigmaQ: $sigmaQ df: $df delta: $delta ")
    val nccs = NonCentralChiSquare(df + delta, delta)
    val norm =  (cutoff - muQ)/sigmaQ
    val norm1 = norm * sigmaX + muX
    val pv = nccs.cdf(norm1)
    if (pv >= 0.0 && pv <= 1.0) {
      CDFLiu(pv, 0)
    } else {
      CDFLiu(pv, 1)
    }
  }
} 
Example 119
Source File: PCA.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
import org.apache.spark.mllib.feature.{PCA => SPCA}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.rdd.RDD
import org.dizhang.seqspark.ds.{DenseCounter, Genotype, SparseCounter}
import org.dizhang.seqspark.util.General._
import org.dizhang.seqspark.worker.Data
import org.slf4j.LoggerFactory


  }
  def pc(n: Int): BDM[Double] = {
    val model = new SPCA(n)
    val data = this.prepare
    if (data.isEmpty()) {
      new BDM[Double](0, 0)
    } else {
      val res = model.fit(data).pc.values
      new BDM(res.length/n, n, res)
    }
  }
} 
Example 120
Source File: Regions.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.annot

import org.apache.spark.SparkContext
import org.dizhang.seqspark.ds.Region
import org.slf4j.LoggerFactory


class Regions(private val loci: Map[Byte, IntervalTree[Region]]) {

  def count(): Int = {
    loci.map{case (k, v) => IntervalTree.count(v)}.sum
  }

  def overlap(r: Region): Boolean = {
    loci.contains(r.chr) && IntervalTree.overlap(loci(r.chr), r)
  }
  def lookup(r: Region): List[Region] = {
    if (! loci.contains(r.chr)) {
      List[Region]()
    } else {
      IntervalTree.lookup(loci(r.chr), r)
    }
  }
}

object Regions {
  type LOCI = Map[Byte, Array[Region]]

  val logger = LoggerFactory.getLogger(this.getClass)

  def comop(m1: LOCI, m2: LOCI): LOCI = {
    m1 ++ (for ((k, v) <- m2) yield k -> (v ++ m1.getOrElse(k, Array())))
  }

  def apply(raw: Iterator[Region]): Regions = {
    val regArr = raw.toArray

    logger.info(s"${regArr.length} regions to parse")

    val regByChrEle = regArr.map(r => Map(r.chr -> Array(r)))
    //logger.info(s"${regByChrEle.count()} regions after map")
    val regByChr = regByChrEle.reduce((a, b) => comop(a, b))
    //logger.info(s"${regByChr.map{case (k, v) => v.length}.sum} regions after combine")
    val rs = new Regions(regByChr.map{case (k, v) => k -> IntervalTree(v.toIterator)})
    logger.info(s"${rs.count()} regions generated")
    rs
  }

  def makeExome(coordFile: String)(sc: SparkContext): Regions = {

    val locRaw = sc.textFile(coordFile).cache()
    val header = locRaw.first().split("\t")
    val locRdd = locRaw.zipWithUniqueId().filter(_._2 > 0).map(_._1)
    //val iter = scala.io.Source.fromFile(coordFile).getLines()
    val raw = locRdd.filter(l => ! l.split("\t")(2).contains("_"))
      .flatMap(l => RefGene.makeExons(l, header)).toLocalIterator
    apply(raw)
  }
} 
Example 121
Source File: Association.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.worker
import org.dizhang.seqspark.assoc.AssocMaster
import org.dizhang.seqspark.ds.Genotype
import org.dizhang.seqspark.util.SeqContext
import org.slf4j.LoggerFactory

object Association {

  private val logger = LoggerFactory.getLogger(getClass)

  def apply[B: Genotype](input: Data[B])(implicit ssc: SeqContext): Unit = {
    if (ssc.userConfig.pipeline.contains("association"))
      if (input.isEmpty()) {
        logger.warn(s"no variants left. cannot perform association analysis")
      } else {
        new AssocMaster(input).run()
      }
  }
} 
Example 122
Source File: Export.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.worker

import java.net.URI
import java.nio.file.{Files, Path, Paths}

import org.dizhang.seqspark.ds.Genotype
import org.dizhang.seqspark.ds.VCF._
import org.dizhang.seqspark.util.SeqContext
import org.dizhang.seqspark.util.UserConfig.hdfs
import org.apache.hadoop
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
object Export {

  private val logger = LoggerFactory.getLogger(getClass)

  def apply[A: Genotype](data: Data[A])(implicit ssc: SeqContext): Unit = {
    val geno = implicitly[Genotype[A]]
    val conf = ssc.userConfig.output.genotype
    if (conf.export) {
      val path = if (conf.path.isEmpty)
        ssc.userConfig.input.genotype.path + "." + ssc.userConfig.project
      else
        conf.path
      logger.info(s"going to export data to $path")

      if (path.startsWith("file:")) {
        val p = Paths.get(URI.create(path))
        if (Files.exists(p)) {
          Files.walk(p)
            .iterator()
            .asScala
            .toList
            .sorted(Ordering[Path].reverse)
            .foreach(f => Files.delete(f))
        }
      } else {
        val hdPath = new hadoop.fs.Path(path)
        if (hdfs.exists(hdPath)) {
          hdfs.delete(hdPath, true)
        }
      }

      data.samples(conf.samples).saveAsTextFile(path)

    }
    if (conf.save || conf.cache) {
      data.saveAsObjectFile(conf.path)
    }
  }
} 
Example 123
Source File: Annotation.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.worker

import org.apache.spark.SparkContext
import org.dizhang.seqspark.annot._
import org.dizhang.seqspark.annot.VariantAnnotOp._
import org.dizhang.seqspark.ds.{Genotype, Variant}
import org.dizhang.seqspark.util.{Constant, QueryParser, SeqContext}
import org.dizhang.seqspark.util.UserConfig._
import org.dizhang.seqspark.util.ConfigValue._
import org.slf4j.LoggerFactory
import org.apache.hadoop
import org.dizhang.seqspark.worker.Variants.countByFunction

object Annotation {

  private val logger = LoggerFactory.getLogger(getClass)

  private val dbExists = Constant.Variant.dbExists

  def apply[A: Genotype](data: Data[A], a: A)(implicit ssc: SeqContext): Data[A] = {
    logger.info("annotation")

    val conf = ssc.userConfig

    val queryExprs = QueryParser.parse(conf.annotation.addInfo)

    val dbs = QueryParser.dbs(queryExprs.values)

    val assocConf = conf.association

    
      paired.map{
        case (_, (vt, dbmap)) =>
          val res = QueryParser.eval(queryExprs)(dbmap)
          vt.updateInfo(res)
          vt
      }
    }

  }
} 
Example 124
Source File: LogicalParserSpec.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.util

import org.scalatest.{FlatSpec, Matchers}
import org.slf4j.LoggerFactory


class LogicalParserSpec extends FlatSpec with Matchers {

  val logger = LoggerFactory.getLogger(getClass)

  "A LogicalParser" should "be able to constructed" in {
    val lp = LogicalParser.parse("INFO.AN>3800 and INFO.AC>38")
    LogicalParser.parse(List("maf < 0.01 or maf > 0.99", "SS_PASS"))
    LogicalParser.parse(List("maf >= 0.01", "maf <= 0.99", "SS_PASS"))
  }

  "A LogicalParser" should "eval to true" in {
    val lp = LogicalParser.parse("INFO.AN>3800 and INFO.AC>38")
    LogicalParser.eval(lp)(Map("INFO.AN"->"3900", "INFO.AC"->"40")) should be (true)
  }

  "A LogicalParser" should "eval to false" in {
    val lp = LogicalParser.parse("INFO.AN>3800 and INFO.AC>38 and INFO.AC<3750")
    LogicalParser.eval(lp)(Map("INFO.AN"->"3900", "INFO.AC"->"3775")) should be (false)
  }

  "A LogicalParser" should "handle String comparisons" in {
    val lp = LogicalParser.parse("chr != \"X\" and chr != \"Y\"")
    LogicalParser.eval(lp)(Map("chr" -> "11")) should be (true)
  }

  "A LogicalParser" should "handle nested conditions" in {
    val lp = LogicalParser.parse(List("missingRate < 0.1", "batchMissingRate < 0.1", "hwePvalue >= 1e-5"))
    logger.debug(LogicalParser.view(lp))

    LogicalParser.eval(lp)(
      Map("missingRate" -> "0.3", "batchMissingRate" -> "0.4", "hwePvalue" -> "0.001")
    ) should be (false)
  }

  "A LogicalParser" should "parse filter" in {
    val lp = LogicalParser.parse(List("FILTER==\"PASS\"", "INFO.AN>=3468", "INFO.AC>=34", "INFO.AC<=3815"))
    logger.debug(LogicalParser.view(lp))
  }
} 
Example 125
Source File: GenotypeSpec.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.ds

import org.scalatest.{FlatSpec, Matchers}
import org.slf4j.LoggerFactory


class GenotypeSpec extends FlatSpec with Matchers {
  val logger = LoggerFactory.getLogger(getClass)

  val raw = {
    Array(
      ".:0",
      "0:4",
      "1:3",
      "./.:2",
      "0/0:12",
      "0/1:2",
      "1/0:1",
      "1/1:0",
      ".|.:1",
      "0|0:8",
      "0|1:7",
      "1|0:9",
      "1|1:3"
    )
  }

  val simple = raw.map(g => Genotype.Raw.toSimpleGenotype(g))


  "A Raw Genotype" should "be able to convert to simple and back" in {
    val s = raw.map(g => Genotype.Raw.toSimpleGenotype(g))
    logger.debug(s"raw to simple: ${s.mkString(",")}")
    val r = s.map(g => Genotype.Simple.toVCF(g))
    logger.debug(s"simple to raw: ${r.mkString(",")}")
    r.map(g => Genotype.Raw.toSimpleGenotype(g)) should be (s)
  }

  "A Raw Genotype" should "give right callRate" in {
    val c = raw.map(g => Genotype.Raw.callRate(g))
    logger.debug(s"raw callrate: ${c.mkString(",")}")
    val cnt = Counter.fromIndexedSeq(c, (1.0, 1.0)).reduce
    logger.debug(s"raw callrate: ${cnt._1/cnt._2}")
  }

  "A Simple Genotype" should "give right callRate" in {
    val c = simple.map(g => Genotype.Simple.callRate(g))
    logger.debug(s"simple callRate: ${c.mkString(",")}")
    val cnt = Counter.fromIndexedSeq(c, (1.0, 1.0)).reduce
    logger.debug(s"simple callrate: ${cnt._1/cnt._2}")
  }

  "A Raw Genotype" should "give right MAF" in {
    val maf = raw.map(g => Genotype.Raw.toAAF(g))
    logger.debug(s"raw maf: ${maf.mkString(",")}")
    val cnt = Counter.fromIndexedSeq(maf, (0.0, 2.0)).reduce
    logger.debug(s"raw maf: ${cnt._1/cnt._2}")
  }

  "A Simple Genotype" should "give right MAF" in {
    val maf = simple.map(g => Genotype.Simple.toAAF(g))
    logger.debug(s"simple maf: ${maf.mkString(",")}")
    val cnt = Counter.fromIndexedSeq(maf, (0.0, 2.0)).reduce
    logger.debug(s"simple maf: ${cnt._1/cnt._2}")
  }

} 
Example 126
Source File: JavaScript.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.magic.builtin

import java.io.PrintStream

import com.google.common.base.Strings
import org.apache.toree.kernel.protocol.v5.MIMEType
import org.apache.toree.magic._
import org.apache.toree.magic.dependencies.IncludeOutputStream
import org.apache.toree.utils.ArgumentParsingSupport
import org.slf4j.LoggerFactory
import org.apache.toree.plugins.annotations.Event

class JavaScript extends CellMagic with ArgumentParsingSupport
  with IncludeOutputStream {

  // Lazy because the outputStream is not provided at construction
  private def printStream = new PrintStream(outputStream)
  
  @Event(name = "javascript")
  override def execute(code: String): CellMagicOutput = {
    def printHelpAndReturn: CellMagicOutput = {
      printHelp(printStream, """%JavaScript <string_code>""")
      CellMagicOutput()
    }

    Strings.isNullOrEmpty(code) match {
      case true => printHelpAndReturn
      case false => CellMagicOutput(MIMEType.ApplicationJavaScript -> code)
    }
  }
} 
Example 127
Source File: InterpreterManager.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.boot.layer

import org.apache.toree.kernel.api.KernelLike
import com.typesafe.config.Config
import org.apache.toree.interpreter._
import scala.collection.JavaConverters._

import org.slf4j.LoggerFactory

case class InterpreterManager(
  default: String = "Scala",
  interpreters: Map[String, Interpreter] = Map[String, Interpreter]()
) {


  def initializeInterpreters(kernel: KernelLike): Unit = {
    interpreters.values.foreach(interpreter =>
      interpreter.init(kernel)
    )
  }

  def addInterpreter(
    name:String,
    interpreter: Interpreter
  ): InterpreterManager = {
    copy(interpreters = interpreters + (name -> interpreter))
  }

  def defaultInterpreter: Option[Interpreter] = {
    interpreters.get(default)
  }
}

object InterpreterManager {

  protected val logger = LoggerFactory.getLogger(this.getClass.getName)

  def apply(config: Config): InterpreterManager = {
    val ip = config.getStringList("interpreter_plugins").asScala ++
      config.getStringList("default_interpreter_plugin").asScala

    val m = ip.foldLeft(Map[String, Interpreter]())( (acc, v) => {

      v.split(":") match {
        case Array(name, className) =>
          try {
            val i = instantiate(className, config)
            acc + (name -> i)
          }
          catch {
            case e:Throwable =>
              logger.error("Error loading interpreter class " + className)
              logger.error(e.getMessage())
              //acc
              throw e
          }
        case _ => acc
      }
    })

    val default = config.getString("default_interpreter")

    InterpreterManager(interpreters = m, default = default)
  }

  
  private def instantiate(className:String, config:Config):Interpreter = {
    try {
      Class
        .forName(className)
        .getConstructor(Class.forName("com.typesafe.config.Config"))
        .newInstance(config).asInstanceOf[Interpreter]
    }
    catch {
      case e: NoSuchMethodException =>
        logger.debug("Using default constructor for class " + className)
        Class
          .forName(className)
          .newInstance().asInstanceOf[Interpreter]
    }

  }

} 
Example 128
Source File: SocketConfigSpec.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.kernel.protocol.v5.kernel.socket

import com.typesafe.config.ConfigFactory
import org.scalatest.{FunSpec, Matchers}
import org.slf4j.LoggerFactory
import play.api.data.validation.ValidationError
import play.api.libs.json.{JsPath, JsValue, Json}

class SocketConfigSpec extends FunSpec with Matchers {
  val logger = LoggerFactory.getLogger("jt4")
  //logger.error("WOOT!")

  private val jsonString: String =
    """
    {
      "stdin_port": 10000,
      "control_port": 10001,
      "hb_port": 10002,
      "shell_port": 10003,
      "iopub_port": 10004,
      "ip": "1.2.3.4",
      "transport": "tcp",
      "signature_scheme": "hmac-sha256",
      "key": ""
    }
    """.stripMargin

  val socketConfigJson: JsValue = Json.parse(jsonString)

  val socketConfigFromConfig = SocketConfig.fromConfig(ConfigFactory.parseString(jsonString))

  val socketConfig = SocketConfig(
    10000, 10001, 10002, 10003, 10004, "1.2.3.4", "tcp", "hmac-sha256", ""
  )

  describe("SocketConfig") {
    describe("implicit conversions") {
      it("should implicitly convert from valid json to a SocketConfig instance") {
        // This is the least safe way to convert as an error is thrown if it fails
        socketConfigJson.as[SocketConfig] should be (socketConfig)
      }

      it("should also work with asOpt") {
        // This is safer, but we lose the error information as it returns
        // None if the conversion fails
        val newCompleteRequest = socketConfigJson.asOpt[SocketConfig]

        newCompleteRequest.get should be (socketConfig)
      }

      it("should also work with validate") {
        // This is the safest as it collects all error information (not just first error) and reports it
        val CompleteRequestResults = socketConfigJson.validate[SocketConfig]

        CompleteRequestResults.fold(
          (invalid: Seq[(JsPath, Seq[ValidationError])]) => println("Failed!"),
          (valid: SocketConfig) => valid
        ) should be (socketConfig)
      }

      it("should implicitly convert from a SocketConfig instance to valid json") {
        Json.toJson(socketConfig) should be (socketConfigJson)
      }
    }
    describe("#toConfig") {
      it("should implicitly convert from valid json to a SocketConfig instance") {
        // This is the least safe way to convert as an error is thrown if it fails
        socketConfigFromConfig should be (socketConfig)
      }
      
      it("should convert json file to SocketConfig object") {
        socketConfigFromConfig.stdin_port should be (10000)
      }
    }
  }
} 
Example 129
Source File: MagicManager.scala    From incubator-toree   with Apache License 2.0 5 votes vote down vote up
package org.apache.toree.magic

import org.apache.toree.plugins.{Plugin, PluginMethodResult, PluginManager}
import org.slf4j.LoggerFactory

import scala.annotation.tailrec
import scala.language.dynamics
import scala.runtime.BoxedUnit
import scala.util.{Try, Failure, Success}

class MagicManager(private val pluginManager: PluginManager) extends Dynamic {
  protected val logger = LoggerFactory.getLogger(this.getClass.getName)
  
  @throws[MagicNotFoundException]
  def findMagic(name: String): Magic = {
    @tailrec def inheritsMagic(klass: Class[_]): Boolean = {
      if (klass == null) false
      else if (klass.getInterfaces.exists(classOf[Magic].isAssignableFrom)) true
      else inheritsMagic(klass.getSuperclass)
    }

    val magics = pluginManager.plugins
      .filter(p => inheritsMagic(p.getClass))
      .filter(_.simpleName.split("\\.").last.toLowerCase == name.toLowerCase)

    if (magics.size <= 0){
      logger.error(s"No magic found for $name!")
      throw new MagicNotFoundException(name)
    } else if (magics.size > 1) {
      logger.warn(s"More than one magic found for $name!")
    }

    magics.head.asInstanceOf[Magic]
  }

  @throws[MagicNotFoundException]
  def applyDynamic(name: String)(args: Any*): MagicOutput = {
    val arg = args.headOption.map(_.toString).getOrElse("")

    import org.apache.toree.plugins.Implicits._
    val result = pluginManager.fireEventFirstResult(
      name.toLowerCase(),
      "input" -> arg
    )

    result match {
      case Some(r: PluginMethodResult) => handleMagicResult(name, r.toTry)
      case None => throw new MagicNotFoundException(name)
    }
  }

  private def handleMagicResult(name: String, result: Try[Any]): MagicOutput = result match {
    case Success(magicOutput) => magicOutput match {
      case out: MagicOutput => out
      case null | _: BoxedUnit => MagicOutput()
      case cmo: Map[_, _]
        if cmo.keys.forall(_.isInstanceOf[String]) &&
          cmo.values.forall(_.isInstanceOf[String]) =>
        MagicOutput(cmo.asInstanceOf[Map[String, String]].toSeq:_*)
      case unknown =>
        val message =
          s"""Magic $name did not return proper magic output
             |type. Expected ${classOf[MagicOutput].getName}, but found
             |type of ${unknown.getClass.getName}.""".trim.stripMargin
        logger.warn(message)
        MagicOutput("text/plain" -> message)
    }
    case Failure(t) =>
      val message =  s"Magic $name failed to execute with error: \n${t.getMessage}"
      logger.warn(message, t)
      MagicOutput("text/plain" -> message)
  }
} 
Example 130
Source File: SignatureHashTestCaseProtocol.scala    From bitcoin-s   with MIT License 5 votes vote down vote up
package org.bitcoins.core.protocol.script.testprotocol

import org.bitcoins.core.number.{Int32, UInt32}
import org.bitcoins.core.protocol.script.ScriptPubKey
import org.bitcoins.core.protocol.transaction.Transaction
import org.bitcoins.core.script.crypto.HashType
import org.bitcoins.core.serializers.script.ScriptParser
import org.bitcoins.crypto.DoubleSha256Digest
import org.slf4j.LoggerFactory
import spray.json._


object SignatureHashTestCaseProtocol extends DefaultJsonProtocol {
  private val logger = LoggerFactory.getLogger(this.getClass)

  implicit object SignatureTestCaseProtocol
      extends RootJsonFormat[SignatureHashTestCase] {

    override def read(value: JsValue): SignatureHashTestCase = {
      val jsArray: JsArray = value match {
        case array: JsArray => array
        case _: JsValue =>
          throw new RuntimeException(
            "Script signature hash test case must be in jsarray format")
      }
      val elements: Vector[JsValue] = jsArray.elements
      val transaction: Transaction = Transaction(
        elements.head.convertTo[String])
      val asm = ScriptParser.fromHex(elements.apply(1).convertTo[String])
      val script: ScriptPubKey = ScriptPubKey(asm)
      val inputIndex: UInt32 = UInt32(elements(2).convertTo[Int])
      val hashTypeNum: Int32 = Int32(elements(3).convertTo[Int])
      val hashType: HashType = HashType(hashTypeNum)
      val hash: DoubleSha256Digest = DoubleSha256Digest(
        elements.last.convertTo[String])
      SignatureHashTestCaseImpl(transaction,
                                script,
                                inputIndex,
                                hashTypeNum,
                                hashType,
                                hash)
    }
    override def write(testCase: SignatureHashTestCase): JsValue = ???
  }
} 
Example 131
Source File: UInt5Test.scala    From bitcoin-s   with MIT License 5 votes vote down vote up
package org.bitcoins.core.number

import org.bitcoins.testkit.core.gen.NumberGenerator
import org.bitcoins.testkit.util.BitcoinSUnitTest
import org.slf4j.LoggerFactory

class UInt5Test extends BitcoinSUnitTest {

  behavior of "UInt5"

  it must "convert a byte to a UInt5 correctly" in {
    UInt5.fromByte(0.toByte) must be(UInt5.zero)
    UInt5(1.toByte) must be(UInt5.one)

    UInt5(31.toByte) must be(UInt5.max)
  }

  it must "not allow negative numbers" in {
    intercept[IllegalArgumentException] {
      UInt5(-1)
    }
  }

  it must "not allow numbers more than 31" in {
    intercept[IllegalArgumentException] {
      UInt5(32)
    }
  }

  it must "have serialization symmetry" in {
    forAll(NumberGenerator.uInt5) { u5 =>
      val u52 = UInt5.fromHex(u5.hex)
      u52 == u5
    }
  }

  it must "uint5 -> byte -> uint5" in {
    forAll(NumberGenerator.uInt5) { u5 =>
      val byte = u5.byte
      UInt5.fromByte(byte) == u5
    }
  }

  it must "uint5 -> uint8 -> uint5" in {
    forAll(NumberGenerator.uInt5) { u5 =>
      val u8 = u5.toUInt8
      u8.toUInt5 == u5
    }
  }
} 
Example 132
Source File: ZMQSubscriberTest.scala    From bitcoin-s   with MIT License 5 votes vote down vote up
package org.bitcoins.zmq

import java.net.InetSocketAddress

import org.bitcoins.core.util.BytesUtil
import org.scalatest.flatspec.AsyncFlatSpec
import org.slf4j.LoggerFactory
import org.zeromq.{ZFrame, ZMQ, ZMsg}
import scodec.bits.ByteVector

import scala.concurrent.Promise

class ZMQSubscriberTest extends AsyncFlatSpec {
  private val logger = LoggerFactory.getLogger(this.getClass().toString)

  behavior of "ZMQSubscriber"

  it must "connect to a regtest instance of a daemon and stream txs/blocks from it" in {
    //note for this unit test to pass, you need to setup a bitcoind instance yourself
    //and set the bitcoin.conf file to allow for
    //zmq connections
    //see: https://github.com/bitcoin/bitcoin/blob/master/doc/zmq.md
    val socket = new InetSocketAddress("tcp://127.0.0.1", 29000)

    val zmqSub =
      new ZMQSubscriber(socket, None, None, rawTxListener, rawBlockListener)
    //stupid, doesn't test anything, for now. You need to look at log output to verify this is working
    // TODO: In the future this could use the testkit to verify the subscriber by calling generate(1)
    zmqSub.start()
    Thread.sleep(10000) // 10 seconds
    zmqSub.stop

    succeed
  }

  it must "be able to subscribe to a publisher and read a value" in {
    val port = Math.abs(scala.util.Random.nextInt % 14000) + 1000
    val socket = new InetSocketAddress("tcp://127.0.0.1", port)

    val context = ZMQ.context(1)
    val publisher = context.socket(ZMQ.PUB)

    val uri = socket.getHostString + ":" + socket.getPort
    publisher.bind(uri)

    val valuePromise = Promise[String]()
    val fakeBlockListener: Option[ByteVector => Unit] = Some { bytes =>
      val str = new String(bytes.toArray)
      valuePromise.success(str)
      ()
    }

    val sub = new ZMQSubscriber(socket, None, None, None, fakeBlockListener)
    sub.start()
    Thread.sleep(1000)

    val testValue = "sweet, sweet satoshis"

    val msg = new ZMsg()
    msg.add(new ZFrame(RawBlock.topic))
    msg.add(new ZFrame(testValue))

    val sent = msg.send(publisher)
    assert(sent)

    valuePromise.future.map { str =>
      sub.stop
      publisher.close()
      context.term()

      assert(str == testValue)
    }
  }

  val rawBlockListener: Option[ByteVector => Unit] = Some {
    { bytes: ByteVector =>
      val hex = BytesUtil.encodeHex(bytes)
      logger.debug(s"received raw block ${hex}")
    }
  }

  val hashBlockListener: Option[ByteVector => Unit] = Some {
    { bytes: ByteVector =>
      val hex = BytesUtil.encodeHex(bytes)
      logger.debug(s"received raw block hash ${hex}")

    }
  }

  val rawTxListener: Option[ByteVector => Unit] = Some {
    { bytes: ByteVector =>
      val hex = BytesUtil.encodeHex(bytes)
      logger.debug(s"received raw tx ${hex}")
    }
  }
} 
Example 133
Source File: BlockBench.scala    From bitcoin-s   with MIT License 5 votes vote down vote up
package org.bitcoins.bench.core

import org.bitcoins.core.protocol.blockchain.Block
import org.slf4j.LoggerFactory

import scala.io.Source

object BlockBench extends App {
  private def logger = LoggerFactory.getLogger(this.getClass)

  private def timeBlockParsing[R](block: () => R): Long = {
    val t0 = System.currentTimeMillis()
    val _ = block() // call-by-name
    val t1 = System.currentTimeMillis()
    val time = t1 - t0
    logger.info("Elapsed time: " + time + "ms")
    time
  }

  def bench1(): Unit = {
    val fileName =
      "/00000000000000000008513c860373da0484f065983aeb063ebf81c172e81d48.txt"
    val lines = Source.fromURL(getClass.getResource(fileName)).mkString
    val time = timeBlockParsing(() => Block.fromHex(lines))
    require(time <= 15000)
  }

  def bench2(): Unit = {
    val fileName =
      "/000000000000000000050f70113ab1932c195442cb49bcc4ee4d7f426c8a3295.txt"
    val lines = Source.fromURL(getClass.getResource(fileName)).mkString
    val time = timeBlockParsing(() => Block.fromHex(lines))
    require(time <= 15000)
  }

  0.until(10).foreach(_ => bench1())

  //bench2()
} 
Example 134
Source File: SuspiciousConnects.scala    From oni-ml   with Apache License 2.0 5 votes vote down vote up
package org.opennetworkinsight

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.slf4j.LoggerFactory
import org.opennetworkinsight.SuspiciousConnectsArgumentParser.SuspiciousConnectsConfig
import org.opennetworkinsight.dns.DNSSuspiciousConnects
import org.opennetworkinsight.netflow.FlowSuspiciousConnects
import org.opennetworkinsight.proxy.ProxySuspiciousConnectsAnalysis


  def main(args: Array[String]) {

    val parser = SuspiciousConnectsArgumentParser.parser

    parser.parse(args, SuspiciousConnectsConfig()) match {
      case Some(config) =>
        val logger = LoggerFactory.getLogger(this.getClass)
        Logger.getLogger("org").setLevel(Level.OFF)
        Logger.getLogger("akka").setLevel(Level.OFF)

        val analysis = config.analysis
        val sparkConfig = new SparkConf().setAppName("ONI ML:  " + analysis + " lda")
        val sparkContext = new SparkContext(sparkConfig)
        val sqlContext = new SQLContext(sparkContext)
        implicit val outputDelimiter = OutputDelimiter

        analysis match {
          case "flow" => FlowSuspiciousConnects.run(config, sparkContext, sqlContext, logger)
          case "dns" => DNSSuspiciousConnects.run(config, sparkContext, sqlContext, logger)
          case "proxy" => ProxySuspiciousConnectsAnalysis.run(config, sparkContext, sqlContext, logger)
          case _ => println("ERROR:  unsupported (or misspelled) analysis: " + analysis)
        }

        sparkContext.stop()

      case None => println("Error parsing arguments")
    }

    System.exit(0)
  }


} 
Example 135
Source File: SLF4JSpec.scala    From scribe   with MIT License 5 votes vote down vote up
package spec

import java.util.TimeZone

import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec
import org.slf4j.{LoggerFactory, MDC}
import scribe.handler.LogHandler
import scribe.output.LogOutput
import scribe.util.Time
import scribe.writer.Writer
import scribe.{Level, LogRecord, Logger}

class SLF4JSpec extends AnyWordSpec with Matchers {
  TimeZone.setDefault(TimeZone.getTimeZone("UTC"))

  private var logs: List[LogRecord[_]] = Nil
  private var logOutput: List[String] = Nil
  private val recordHolder = LogHandler.default.withMinimumLevel(Level.Info).withWriter(new Writer {
    override def write[M](record: LogRecord[M], output: LogOutput): Unit = {
      logs = record :: logs
      logOutput = output.plainText :: logOutput
    }
  })

  "SLF4J" should {
    "set the time to an arbitrary value" in {
      Time.function = () => 1542376191920L
    }
    "remove existing handlers from Root" in {
      Logger.root.clearHandlers().replace()
    }
    "add a testing handler" in {
      Logger.root.withHandler(recordHolder).replace()
    }
    "verify not records are in the RecordHolder" in {
      logs.isEmpty should be(true)
    }
    "log to Scribe" in {
      val logger = LoggerFactory.getLogger(getClass)
      logger.info("Hello World!")
    }
    "verify Scribe received the record" in {
      logs.size should be(1)
      val r = logs.head
      r.level should be(Level.Info)
      r.message.plainText should be("Hello World!")
      r.className should be("spec.SLF4JSpec")
      logs = Nil
    }
    "verify Scribe wrote value" in {
      logOutput.size should be(1)
      val s = logOutput.head
      s should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - Hello World!")
    }
    "use MDC" in {
      MDC.put("name", "John Doe")
      val logger = LoggerFactory.getLogger(getClass)
      logger.info("A generic name")
      logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - A generic name (name: John Doe)")
    }
    "clear MDC" in {
      MDC.clear()
      val logger = LoggerFactory.getLogger(getClass)
      logger.info("MDC cleared")
      logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - MDC cleared")
    }
    "make sure logging nulls doesn't error" in {
      val logger = LoggerFactory.getLogger(getClass)
      logger.error(null)
      logs.length should be(3)
      logOutput.head should be("2018.11.16 13:49:51 [ERROR] spec.SLF4JSpec - null")
    }
  }
} 
Example 136
Source File: SLF4JSpec.scala    From scribe   with MIT License 5 votes vote down vote up
package spec

import java.util.TimeZone

import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec
import org.slf4j.{LoggerFactory, MDC}
import scribe.handler.LogHandler
import scribe.output.LogOutput
import scribe.util.Time
import scribe.writer.Writer
import scribe.{Level, LogRecord, Logger}

class SLF4JSpec extends AnyWordSpec with Matchers {
  TimeZone.setDefault(TimeZone.getTimeZone("UTC"))

  private var logs: List[LogRecord[_]] = Nil
  private var logOutput: List[String] = Nil
  private val recordHolder = LogHandler.default.withMinimumLevel(Level.Info).withWriter(new Writer {
    override def write[M](record: LogRecord[M], output: LogOutput): Unit = {
      logs = record :: logs
      logOutput = output.plainText :: logOutput
    }
  })

  "SLF4J" should {
    "set the time to an arbitrary value" in {
      Time.function = () => 1542376191920L
    }
    "remove existing handlers from Root" in {
      Logger.root.clearHandlers().replace()
    }
    "add a testing handler" in {
      Logger.root.withHandler(recordHolder).replace()
    }
    "verify not records are in the RecordHolder" in {
      logs.isEmpty should be(true)
    }
    "log to Scribe" in {
      val logger = LoggerFactory.getLogger(getClass)
      logger.info("Hello World!")
    }
    "verify Scribe received the record" in {
      logs.size should be(1)
      val r = logs.head
      r.level should be(Level.Info)
      r.message.plainText should be("Hello World!")
      r.className should be("spec.SLF4JSpec")
      logs = Nil
    }
    "verify Scribe wrote value" in {
      logOutput.size should be(1)
      val s = logOutput.head
      s should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - Hello World!")
    }
    "use MDC" in {
      MDC.put("name", "John Doe")
      val logger = LoggerFactory.getLogger(getClass)
      logger.info("A generic name")
      logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - A generic name (name: John Doe)")
    }
    "clear MDC" in {
      MDC.clear()
      val logger = LoggerFactory.getLogger(getClass)
      logger.info("MDC cleared")
      logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - MDC cleared")
    }
    "make sure logging nulls doesn't error" in {
      val logger = LoggerFactory.getLogger(getClass)
      logger.error(null)
      logs.length should be(3)
      logOutput.head should be("2018.11.16 13:49:51 [ERROR] spec.SLF4JSpec - null")
    }
  }
} 
Example 137
Source File: Logging.scala    From incubator-livy   with Apache License 2.0 5 votes vote down vote up
package org.apache.livy

import org.slf4j.LoggerFactory

trait Logging {
  lazy val logger = LoggerFactory.getLogger(this.getClass)

  def trace(message: => Any): Unit = {
    if (logger.isTraceEnabled) {
      logger.trace(message.toString)
    }
  }

  def debug(message: => Any): Unit = {
    if (logger.isDebugEnabled) {
      logger.debug(message.toString)
    }
  }

  def info(message: => Any): Unit = {
    if (logger.isInfoEnabled) {
      logger.info(message.toString)
    }
  }

  def warn(message: => Any): Unit = {
    logger.warn(message.toString)
  }

  def warn(message: => Any, t: Throwable): Unit = {
    logger.warn(message.toString, t)
  }

  def error(message: => Any, t: Throwable): Unit = {
    logger.error(message.toString, t)
  }

  def error(message: => Any): Unit = {
    logger.error(message.toString)
  }
} 
Example 138
Source File: SequoiadbRDDIterator.scala    From spark-sequoiadb   with Apache License 2.0 5 votes vote down vote up
package com.sequoiadb.spark.rdd


import _root_.com.sequoiadb.spark.SequoiadbConfig
import _root_.com.sequoiadb.spark.io.SequoiadbReader
import org.apache.spark._
import org.apache.spark.sql.sources.Filter
import org.bson.BSONObject
import org.slf4j.{Logger, LoggerFactory}
//import java.io.FileOutputStream;  


class SequoiadbRDDIterator(
  taskContext: TaskContext,
  partition: Partition,
  config: SequoiadbConfig,
  requiredColumns: Array[String],
  filters: Array[Filter],
  queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON,
  queryLimit: Long = -1)
  extends Iterator[BSONObject] {

  
  
  private var LOG: Logger = LoggerFactory.getLogger(this.getClass.getName())
  protected var finished = false
  private var closed = false
  private var initialized = false

  lazy val reader = {
    initialized = true
    initReader()
  }

  // Register an on-task-completion callback to close the input stream.
  taskContext.addTaskCompletionListener((context: TaskContext) => closeIfNeeded())

  override def hasNext: Boolean = {
    !finished && reader.hasNext
  }

  override def next(): BSONObject = {
    if (!hasNext) {
      throw new NoSuchElementException("End of stream")
    }
    reader.next()
  }

  def closeIfNeeded(): Unit = {
    if (!closed) {
      close()
      closed = true
    }
  }

  protected def close(): Unit = {
    if (initialized) {
      reader.close()
    }
  }

  def initReader() = {
    val reader = new SequoiadbReader(config,requiredColumns,filters, queryReturnType, queryLimit)
    reader.init(partition)
    reader
  }
} 
Example 139
Source File: SequoiadbRDD.scala    From spark-sequoiadb   with Apache License 2.0 5 votes vote down vote up
package com.sequoiadb.spark.rdd

import org.apache.spark.SparkContext
import _root_.com.sequoiadb.spark.SequoiadbConfig
import com.sequoiadb.spark.partitioner._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.Filter
import org.apache.spark.{Partition, TaskContext}
import org.bson.BSONObject
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.ArrayBuffer
//import java.io.FileOutputStream;  


  def apply (
    sc: SQLContext,
    config: SequoiadbConfig,
    partitioner: Option[SequoiadbPartitioner] = None,
    requiredColumns: Array[String] = Array(),
    filters: Array[Filter] = Array(),
    queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON,
    queryLimit: Long = -1) = {
    new SequoiadbRDD ( sc.sparkContext, config, partitioner,
      requiredColumns, filters, queryReturnType, queryLimit)
  }
} 
Example 140
Source File: SequoiadbWriter.scala    From spark-sequoiadb   with Apache License 2.0 5 votes vote down vote up
package com.sequoiadb.spark.io


  def save(it: Iterator[Row], schema: StructType): Unit = {
    try {
      ds = Option(new SequoiadbDatasource (
          config[List[String]](SequoiadbConfig.Host),
          config[String](SequoiadbConfig.Username),
          config[String](SequoiadbConfig.Password),
          ConnectionUtil.initConfigOptions,
          ConnectionUtil.initSequoiadbOptions ))
      // pickup a connection
      connection = Option(ds.get.getConnection)
      
      // locate collection
      val cl = connection.get.getCollectionSpace(
          config[String](SequoiadbConfig.CollectionSpace)).getCollection(
              config[String](SequoiadbConfig.Collection))
      LOG.info ("bulksize = " + config[String](SequoiadbConfig.BulkSize))
      // loop through it and perform batch insert
      // batch size is defined in SequoiadbConfig.BulkSize
      val list : ArrayList[BSONObject] = new ArrayList[BSONObject]()
      while ( it.hasNext ) {
        val record = it.next
        val bsonrecord = SequoiadbRowConverter.rowAsDBObject ( record, schema )
        list.add(bsonrecord)
        if ( list.size >= config[String](SequoiadbConfig.BulkSize).toInt ) {
          cl.bulkInsert ( list, 0 )
          list.clear
        }
      }
      // insert rest of the record if there's any
      if ( list.size > 0 ) {
        cl.bulkInsert ( list, 0 )
        list.clear
      }
    } catch {
      case ex: Exception => throw SequoiadbException(ex.getMessage, ex)
    } finally {
      ds.fold(ifEmpty=()) { connectionpool =>
        connection.fold(ifEmpty=()) { conn =>
          connectionpool.close(conn)
        }
        connectionpool.close
      } // ds.fold(ifEmpty=())
    } // finally
  } // def save(it: Iterator[BSONObject]): Unit =
} 
Example 141
Source File: AbstractLoggingServiceRegistryClient.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.devmode.internal.registry

import java.net.URI

import org.slf4j.Logger
import org.slf4j.LoggerFactory

import scala.collection.immutable
import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.util.Failure
import scala.util.Success

private[lagom] abstract class AbstractLoggingServiceRegistryClient(implicit ec: ExecutionContext)
    extends ServiceRegistryClient {
  protected val log: Logger = LoggerFactory.getLogger(getClass)

  override def locateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]] = {
    require(
      serviceName != ServiceRegistryClient.ServiceName,
      "The service registry client cannot locate the service registry service itself"
    )
    log.debug("Locating service name=[{}] ...", serviceName)

    val location: Future[immutable.Seq[URI]] = internalLocateAll(serviceName, portName)

    location.onComplete {
      case Success(Nil) =>
        log.warn("serviceName=[{}] was not found. Hint: Maybe it was not started?", serviceName)
      case Success(uris) =>
        log.debug("serviceName=[{}] can be reached at uris=[{}]", serviceName: Any, uris: Any)
      case Failure(e) =>
        log.warn("Service registry replied with an error when looking up serviceName=[{}]", serviceName: Any, e: Any)
    }

    location
  }

  protected def internalLocateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]]
} 
Example 142
Source File: HeaderFilters.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package docs.scaladsl.services.headerfilters

package compose {
  import com.lightbend.lagom.scaladsl.api.transport.HeaderFilter
  import com.lightbend.lagom.scaladsl.api.transport.RequestHeader
  import com.lightbend.lagom.scaladsl.api.transport.ResponseHeader
  import com.lightbend.lagom.scaladsl.api.Service
  import com.lightbend.lagom.scaladsl.api.ServiceCall
  import org.slf4j.LoggerFactory

  //#verbose-filter
  class VerboseFilter(name: String) extends HeaderFilter {
    private val log = LoggerFactory.getLogger(getClass)

    def transformClientRequest(request: RequestHeader) = {
      log.debug(name + " - transforming Client Request")
      request
    }

    def transformServerRequest(request: RequestHeader) = {
      log.debug(name + " - transforming Server Request")
      request
    }

    def transformServerResponse(response: ResponseHeader, request: RequestHeader) = {
      log.debug(name + " - transforming Server Response")
      response
    }

    def transformClientResponse(response: ResponseHeader, request: RequestHeader) = {
      log.debug(name + " - transforming Client Response")
      response
    }
  }
  //#verbose-filter

  trait HelloService extends Service {
    def sayHello: ServiceCall[String, String]

    //#header-filter-composition
    def descriptor = {
      import Service._
      named("hello")
        .withCalls(
          call(sayHello)
        )
        .withHeaderFilter(
          HeaderFilter.composite(
            new VerboseFilter("Foo"),
            new VerboseFilter("Bar")
          )
        )
    }
    //#header-filter-composition
  }
} 
Example 143
Source File: AkkaDiscoveryHelper.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.internal.client

import java.net.URI
import java.net.URISyntaxException
import java.util.concurrent.ThreadLocalRandom
import java.util.concurrent.TimeUnit

import akka.discovery.ServiceDiscovery
import akka.discovery.ServiceDiscovery.ResolvedTarget
import com.typesafe.config.Config
import org.slf4j.LoggerFactory

import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.concurrent.duration._


private[lagom] class AkkaDiscoveryHelper(config: Config, serviceDiscovery: ServiceDiscovery)(
    implicit
    ec: ExecutionContext
) {
  private val logger = LoggerFactory.getLogger(this.getClass)

  private val serviceNameMapper = new ServiceNameMapper(config)
  private val lookupTimeout     = config.getDuration("lookup-timeout", TimeUnit.MILLISECONDS).millis

  def locateAll(name: String): Future[Seq[URI]] = {
    val serviceLookup = serviceNameMapper.mapLookupQuery(name)
    serviceDiscovery
      .lookup(serviceLookup.lookup, lookupTimeout)
      .map { resolved =>
        logger.debug("Retrieved addresses: {}", resolved.addresses)
        resolved.addresses.map(target => toURI(target, serviceLookup))
      }
  }

  def locate(name: String): Future[Option[URI]] = locateAll(name).map(selectRandomURI)

  private def toURI(resolvedTarget: ResolvedTarget, lookup: ServiceLookup): URI = {
    val port = resolvedTarget.port.getOrElse(-1)

    val scheme = lookup.scheme.orNull

    try {
      new URI(
        scheme,              // scheme
        null,                // userInfo
        resolvedTarget.host, // host
        port,                // port
        null,                // path
        null,                // query
        null                 // fragment
      )
    } catch {
      case e: URISyntaxException => throw new RuntimeException(e)
    }
  }

  private def selectRandomURI(uris: Seq[URI]) = uris match {
    case Nil      => None
    case Seq(one) => Some(one)
    case many     => Some(many(ThreadLocalRandom.current().nextInt(many.size)))
  }
} 
Example 144
Source File: ServiceNameMapper.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.internal.client

import akka.discovery.Lookup
import com.typesafe.config.Config
import com.typesafe.config.ConfigObject
import com.typesafe.config.ConfigValueType
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._

private[lagom] class ServiceNameMapper(config: Config) {
  private val logger = LoggerFactory.getLogger(this.getClass)

  private val defaultPortName     = readConfigValue(config, "defaults.port-name").toOption
  private val defaultPortProtocol = readConfigValue(config, "defaults.port-protocol").toOption
  private val defaultScheme       = readConfigValue(config, "defaults.scheme").toOption

  private sealed trait ConfigValue {
    def toOption =
      this match {
        case NonEmpty(v) => Some(v)
        case _           => None
      }
  }
  private object ConfigValue {
    def apply(value: String) =
      if (value.trim.isEmpty) Empty
      else NonEmpty(value.trim)
  }
  private case object Undefined              extends ConfigValue
  private case object Empty                  extends ConfigValue
  private case class NonEmpty(value: String) extends ConfigValue

  private def readConfigValue(config: Config, name: String): ConfigValue =
    if (config.hasPathOrNull(name)) {
      if (config.getIsNull(name)) Empty
      else ConfigValue(config.getString(name))
    } else Undefined

  
  private def readOptionalConfigValue(config: Config, name: String, defaultValue: Option[String]): Option[String] =
    readConfigValue(config, name) match {
      case Undefined => defaultValue
      // this is the case the user explicitly set the scheme to empty string
      case Empty           => None
      case NonEmpty(value) => Option(value)
    }

  private val serviceLookupMapping: Map[String, ServiceLookup] =
    config
      .getObject("service-name-mappings")
      .entrySet()
      .asScala
      .map { entry =>
        if (entry.getValue.valueType != ConfigValueType.OBJECT) {
          throw new IllegalArgumentException(
            s"Illegal value type in service-name-mappings: ${entry.getKey} - ${entry.getValue.valueType}"
          )
        }
        val configEntry = entry.getValue.asInstanceOf[ConfigObject].toConfig

        // read config values for portName, portProtocol and scheme
        // when not explicitly overwritten by used, uses default values
        val portName     = readOptionalConfigValue(configEntry, "port-name", defaultPortName)
        val portProtocol = readOptionalConfigValue(configEntry, "port-protocol", defaultPortProtocol)
        val scheme       = readOptionalConfigValue(configEntry, "scheme", defaultScheme)

        val lookup: Lookup =
          readConfigValue(configEntry, "lookup").toOption
            .map(name => parseSrv(name, portName, portProtocol))
            .getOrElse(Lookup(entry.getKey, portName, portProtocol))

        entry.getKey -> ServiceLookup(lookup, scheme)
      }
      .toMap

  private def parseSrv(name: String, portName: Option[String], portProtocol: Option[String]) =
    if (Lookup.isValidSrv(name)) Lookup.parseSrv(name)
    else Lookup(name, portName, portProtocol)

  private[lagom] def mapLookupQuery(name: String): ServiceLookup = {
    val serviceLookup = serviceLookupMapping.getOrElse(
      name,
      ServiceLookup(parseSrv(name, defaultPortName, defaultPortProtocol), defaultScheme)
    )
    logger.debug("Lookup service '{}', mapped to {}", name: Any, serviceLookup: Any)
    serviceLookup
  }
}

private[lagom] case class ServiceLookup(lookup: Lookup, scheme: Option[String]) 
Example 145
Source File: AwaitPersistenceInit.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.internal.persistence.testkit

import java.util.concurrent.TimeUnit

import akka.actor.ActorSystem
import akka.actor.Props
import akka.persistence.PersistentActor
import akka.testkit.TestProbe
import org.slf4j.LoggerFactory

import scala.concurrent.duration._

// A copy of akka.persistence.cassandra.CassandraLifecycle's awaitPersistenceInit.
private[lagom] object AwaitPersistenceInit {
  def awaitPersistenceInit(system: ActorSystem): Unit = {
    val probe = TestProbe()(system)
    val log   = LoggerFactory.getLogger(getClass)
    val t0    = System.nanoTime()
    var n     = 0
    probe.within(45.seconds) {
      probe.awaitAssert {
        n += 1
        system.actorOf(Props[AwaitPersistenceInit], "persistenceInit" + n).tell("hello", probe.ref)
        probe.expectMsg(15.seconds, "hello")
        log.debug(
          "awaitPersistenceInit took {} ms {}",
          TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0),
          system.name
        )
      }
    }
  }
}

private[lagom] class AwaitPersistenceInit extends PersistentActor {
  def persistenceId: String = self.path.name

  def receiveRecover: Receive = {
    case _ =>
  }

  def receiveCommand: Receive = {
    case msg =>
      persist(msg) { _ =>
        sender() ! msg
        context.stop(self)
      }
  }
} 
Example 146
Source File: CassandraReadSideHandler.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.internal.scaladsl.persistence.cassandra

import akka.persistence.query.Offset
import akka.stream.ActorAttributes
import akka.stream.scaladsl.Flow
import akka.Done
import akka.NotUsed
import com.datastax.driver.core.BatchStatement
import com.datastax.driver.core.BoundStatement
import com.lightbend.lagom.internal.persistence.cassandra.CassandraOffsetDao
import com.lightbend.lagom.internal.persistence.cassandra.CassandraOffsetStore
import com.lightbend.lagom.scaladsl.persistence.ReadSideProcessor.ReadSideHandler
import com.lightbend.lagom.scaladsl.persistence._
import com.lightbend.lagom.scaladsl.persistence.cassandra.CassandraSession
import org.slf4j.LoggerFactory

import scala.collection.immutable
import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.collection.JavaConverters._


private[cassandra] final class CassandraAutoReadSideHandler[Event <: AggregateEvent[Event]](
    session: CassandraSession,
    offsetStore: CassandraOffsetStore,
    handlers: Map[Class[_ <: Event], CassandraAutoReadSideHandler.Handler[Event]],
    globalPrepareCallback: () => Future[Done],
    prepareCallback: AggregateEventTag[Event] => Future[Done],
    readProcessorId: String,
    dispatcher: String
)(implicit ec: ExecutionContext)
    extends CassandraReadSideHandler[Event, CassandraAutoReadSideHandler.Handler[Event]](
      session,
      handlers,
      dispatcher
    ) {
  import CassandraAutoReadSideHandler.Handler

  @volatile
  private var offsetDao: CassandraOffsetDao = _

  protected override def invoke(
      handler: Handler[Event],
      element: EventStreamElement[Event]
  ): Future[immutable.Seq[BoundStatement]] = {
    for {
      statements <- handler
        .asInstanceOf[EventStreamElement[Event] => Future[immutable.Seq[BoundStatement]]]
        .apply(element)
    } yield statements :+ offsetDao.bindSaveOffset(element.offset)
  }

  protected def offsetStatement(offset: Offset): immutable.Seq[BoundStatement] =
    immutable.Seq(offsetDao.bindSaveOffset(offset))

  override def globalPrepare(): Future[Done] = {
    globalPrepareCallback.apply()
  }

  override def prepare(tag: AggregateEventTag[Event]): Future[Offset] = {
    for {
      _   <- prepareCallback.apply(tag)
      dao <- offsetStore.prepare(readProcessorId, tag.tag)
    } yield {
      offsetDao = dao
      dao.loadedOffset
    }
  }
} 
Example 147
Source File: LivySubmit.scala    From spark-bench   with Apache License 2.0 5 votes vote down vote up
package com.ibm.sparktc.sparkbench.sparklaunch.submission.livy

import com.ibm.sparktc.sparkbench.sparklaunch.confparse.SparkJobConf
import com.ibm.sparktc.sparkbench.sparklaunch.submission.livy.LivySubmit._
import com.ibm.sparktc.sparkbench.sparklaunch.submission.Submitter
import com.ibm.sparktc.sparkbench.utils.SparkBenchException
import com.softwaremill.sttp.{Id, SttpBackend}
import org.slf4j.{Logger, LoggerFactory}

import scala.annotation.tailrec
import scala.sys.ShutdownHookThread

object LivySubmit {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  val successCode = 200

  import com.softwaremill.sttp._

  val emptyBodyException: SparkBenchException = SparkBenchException("REST call returned empty message body")
  val nonSuccessCodeException: Int => SparkBenchException = (code: Int) => SparkBenchException(s"REST call returned non-sucess code: $code")

  def apply(): LivySubmit = {
    new LivySubmit()(HttpURLConnectionBackend())
  }

  def cancelAllBatches(livyWithID: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyDelete] = {
    log.info(s"Cancelling batch request id: ${livyWithID.id}")
    val response = livyWithID.deleteRequest.send()
    (response.is200, response.body) match {
      case (true, Right(bod)) => if (bod.msg == "deleted") response else throw SparkBenchException(s"Unexpected status for delete request: ${bod.msg}")
      case (true, Left(b))    => throw emptyBodyException
      case (_, _)             => throw nonSuccessCodeException(response.code)
    }
  }

  def sendPostBatchRequest(conf: SparkJobConf)
                          (implicit backend: SttpBackend[Id, Nothing]):
                            (LivyRequestWithID, Response[ResponseBodyBatch]) = {
    val livyRequest = LivyRequest(conf)
    log.info(s"Sending Livy POST request:\n${livyRequest.postRequest.toString}")
    val response: Id[Response[ResponseBodyBatch]] = livyRequest.postRequest.send()
    (response.isSuccess, response.body) match {
      case (true, Left(_)) => throw emptyBodyException
      case (false, Left(_)) => throw nonSuccessCodeException(response.code)
      case (false, Right(bod)) => throw SparkBenchException(s"POST Request to ${livyRequest.postBatchUrl} failed:\n" +
        s"${bod.log.mkString("\n")}")
      case (_,_) => // no exception thrown
    }
    val livyWithID = LivyRequestWithID(livyRequest, response.body.right.get.id)
    (livyWithID, response)
  }

  private def pollHelper(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = {
    Thread.sleep(request.pollSeconds * 1000)
    log.info(s"Sending Livy status GET request:\n${request.statusRequest.toString}")
    val response: Id[Response[ResponseBodyState]] = request.statusRequest.send()
    response
  }

  @tailrec
  def poll(request: LivyRequestWithID, response: Response[ResponseBodyState])
          (implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = (response.isSuccess, response.body) match {
    case (false, _) => throw SparkBenchException(s"Request failed with code ${response.code}")
    case (_, Left(_)) => throw emptyBodyException
    case (true, Right(bod)) => bod.state match {
      case "success" => response
      case "dead" => throw SparkBenchException(s"Poll request failed with state: dead\n" + getLogs(request))
      case "running" => poll(request, pollHelper(request))
      case st => throw SparkBenchException(s"Poll request failed with state: $st")
    }
  }

  def getLogs(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): String = {
    val response = request.logRequest.send()
    (response.is200, response.body) match {
      case (true, Right(bod)) => bod.log.mkString("\n")
      case (false, Right(_)) => throw SparkBenchException(s"Log request failed with code: ${response.code}")
      case (_, Left(_)) => throw emptyBodyException
    }
  }
}

class LivySubmit()(implicit val backend: SttpBackend[Id, Nothing]) extends Submitter {
  override def launch(conf: SparkJobConf): Unit = {
    val (livyWithID, postResponse) = sendPostBatchRequest(conf)(backend)
    val shutdownHook: ShutdownHookThread = sys.ShutdownHookThread {
      // interrupt any batches
      cancelAllBatches(livyWithID)(backend)
    }
    val pollResponse = poll(livyWithID, pollHelper(livyWithID))(backend)
    // The request has completed, so we're going to remove the shutdown hook.
    shutdownHook.remove()
  }
} 
Example 148
Source File: CLIKickoff.scala    From spark-bench   with Apache License 2.0 5 votes vote down vote up
package com.ibm.sparktc.sparkbench.cli

import org.slf4j.{Logger, LoggerFactory}
import com.ibm.sparktc.sparkbench.workload.MultipleSuiteKickoff

object CLIKickoff extends App {
  override def main(args: Array[String]): Unit = {
    val log: Logger = LoggerFactory.getLogger(this.getClass)
    log.info(s"args received: ${args.mkString(", ")}")
    if(args.isEmpty) throw new IllegalArgumentException("CLIKickoff received no arguments")
    val oneStr = args.mkString(" ")
    val worksuites = Configurator(oneStr)
    MultipleSuiteKickoff.run(worksuites)
  }
} 
Example 149
Source File: SparkStreamingQueryListener.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

object SparkStreamingQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

}

class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import SparkStreamingQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)


  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong)
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong)
    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
} 
Example 150
Source File: EventAggregationSpec.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.util

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.protocol.Metrics
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer}
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
import org.apache.spark.sql._
import org.apache.spark.sql.kafka010.KafkaTestUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}

class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] {
  override val testUtils = new KafkaTestUtils[String, CallEvent] {
    override val keySerializer: Serializer[String] = new StringSerializer
    override val keyDeserializer: Deserializer[String] = new StringDeserializer
    override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer
    override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer
  }
  override protected val kafkaTopic = "spark.summit.call.events"
  override protected val partitions = 8

  private val pathToTestScenarios = "src/test/resources/scenarios"

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  test("Should aggregate call events") {
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    testUtils.createTopic(kafkaTopic, partitions, overwrite = true)
    sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime)

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)
    val eventAggregation = EventAggregation(appConfig)

    eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session)
      .writeStream
      .queryName("calleventaggs")
      .format("memory")
      .outputMode(eventAggregation.outputMode)
      .start()
      .processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.13

  }


}

class CallEventSerializer extends Serializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray
  override def close(): Unit = {}
}

class CallEventDeserializer extends Deserializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data)
  override def close(): Unit = {}
} 
Example 151
Source File: TestHelper.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

} 
Example 152
Source File: CanalEntry2RowDataInfoMappingFormat4Sda.scala    From estuary   with Apache License 2.0 5 votes vote down vote up
package com.neighborhood.aka.laplace.estuary.mysql.lifecycle.reborn.batch.mappings

import com.neighborhood.aka.laplace.estuary.bean.key.PartitionStrategy
import com.neighborhood.aka.laplace.estuary.mysql.lifecycle
import com.neighborhood.aka.laplace.estuary.mysql.lifecycle.MysqlRowDataInfo
import com.neighborhood.aka.laplace.estuary.mysql.schema.SdaSchemaMappingRule
import com.neighborhood.aka.laplace.estuary.mysql.schema.tablemeta.MysqlTableSchemaHolder
import com.typesafe.config.Config
import org.slf4j.LoggerFactory


final class CanalEntry2RowDataInfoMappingFormat4Sda(
                                                     override val partitionStrategy: PartitionStrategy,
                                                     override val syncTaskId: String,
                                                     override val syncStartTime: Long,
                                                     override val schemaComponentIsOn: Boolean,
                                                     override val isCheckSchema: Boolean,
                                                     override val config: Config,
                                                     override val schemaHolder: Option[MysqlTableSchemaHolder] = None,
                                                     val tableMappingRule: SdaSchemaMappingRule

                                                   ) extends CanalEntry2RowDataInfoMappingFormat {

  override protected lazy val logger = LoggerFactory.getLogger(classOf[CanalEntry2RowDataInfoMappingFormat4Sda])

  override def transform(x: lifecycle.EntryKeyClassifier): MysqlRowDataInfo = {
    val entry = x.entry
    val header = entry.getHeader
    val (dbName, tableName) = tableMappingRule.getMappingName(header.getSchemaName, header.getTableName)
    val dmlType = header.getEventType
    val columnList = x.columnList
    checkAndGetMysqlRowDataInfo(dbName, tableName, dmlType, columnList,entry)
  }


} 
Example 153
Source File: MultipleJsonKeyPartitioner.scala    From estuary   with Apache License 2.0 5 votes vote down vote up
package com.neighborhood.aka.laplace.estuary.bean.key

import java.util

import org.apache.kafka.clients.producer.Partitioner
import org.apache.kafka.common.Cluster
import org.slf4j.LoggerFactory


class MultipleJsonKeyPartitioner extends Partitioner {
  val logger = LoggerFactory.getLogger(classOf[MultipleJsonKeyPartitioner])

  private def partitionByPrimaryKey(key: Any)(implicit partitions: Int): Int = {
    key.hashCode() % partitions
  }

  private def partitionByMod(mod: Long)(implicit partitions: Int): Int = (mod % partitions) toInt

  private def partitionByDbAndTable(db: String, tb: String)(implicit partitions: Int): Int = s"$db-$tb".hashCode % partitions

  override def partition(topic: String, key: Any, keyBytes: Array[Byte], value: Any, valueBytes: Array[Byte], cluster: Cluster): Int = {
    implicit val partitions: Int = cluster.partitionCountForTopic(topic)
    key match {
      case x: BinlogKey => {
        x.getPartitionStrategy match {
          case PartitionStrategy.MOD => math.abs(partitionByMod(x.getSyncTaskSequence))
          case PartitionStrategy.PRIMARY_KEY => math.abs(partitionByPrimaryKey(x.getPrimaryKeyValue))
          case _ => ???
        }
      }
      case x: OplogKey => {
        x.getPartitionStrategy match {
          case PartitionStrategy.PRIMARY_KEY => math.abs(partitionByPrimaryKey(x.getMongoOpsUuid))
          case _ => ???
        }
      }
    }
  }

  override def close(): Unit = {}

  override def configure(configs: util.Map[String, _]): Unit = {}
} 
Example 154
Source File: SupportUtil.scala    From estuary   with Apache License 2.0 5 votes vote down vote up
package com.neighborhood.aka.laplace.estuary.core.util

import com.neighborhood.aka.laplace.estuary.bean.exception.other.TimeoutException
import com.neighborhood.aka.laplace.estuary.core.task.TaskManager
import org.slf4j.LoggerFactory

import scala.annotation.tailrec


  @tailrec
  @throws[TimeoutException]
  def loopWaiting4SendCurrentAllDataFinish(taskManager: TaskManager, timeout: Option[Long] = None, startTs: Long = System.currentTimeMillis()): Unit = {
    lazy val currentTs = System.currentTimeMillis()
    lazy val totalCost = currentTs - startTs
    lazy val isTimeout = timeout.fold(false)(t => totalCost >= t)
    if (isTimeout) {
      logger.warn(s"time has been run out when loopWaiting4SendDataFinish,currentTs:$currentTs,timeOut:$timeout,startTs:$startTs")
      throw new TimeoutException(s"time has been run out when loopWaiting4SendDataFinish,currentTs:$currentTs,timeOut:$timeout,startTs:$startTs")
    } else loopWaiting4SendCurrentAllDataFinish(taskManager, timeout, startTs)
  }
} 
Example 155
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 156
Source File: KafkaConsumer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.wmconsumer

import java.io.File
import java.time.Duration
import java.util.Collections
import java.util.ConcurrentModificationException
import java.util.Properties

import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer}
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.FileEditor
import org.json4s._
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) {
  import KafkaConsumer._
  implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

  logger.info("Opening consumer...")

  protected val consumer: ApacheKafkaConsumer[String, String] = {
    val consumer = new ApacheKafkaConsumer[String, String](properties)

    consumer.subscribe(Collections.singletonList(topic))
    consumer
  }

  def poll(duration: Int): Unit = {
    val records = consumer.poll(Duration.ofSeconds(duration))

    logger.info(s"Polling ${records.count} records...")
    records.forEach { record =>
      val key = record.key
      val value = record.value
      // Imply an extension on the file so that it can be replaced.
      val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get
      logger.info("Consuming " + file.getName)

      FileUtils.printWriterFromFile(file).autoClose { printWriter =>
        printWriter.print(value)
      }
    }
  }

  def close(): Unit = {
    logger.info("Closing consumer...")
    try {
      consumer.close(Duration.ofSeconds(closeDuration))
    }
    catch {
      case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access
    }
  }
}

object KafkaConsumer {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
} 
Example 157
Source File: KafkaConsumerApp.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.wmconsumer

import java.util.Properties

import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.WmUserApp
import org.clulab.wm.wmexchanger.utils.SafeThread
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumerApp(args: Array[String]) extends WmUserApp(args,  "/kafkaconsumer.properties") {
  val localKafkaProperties: Properties = {
    // This allows the login to be contained in a file external to the project.
    val loginProperty = appProperties.getProperty("login")
    val loginPropertiesBuilder = PropertiesBuilder.fromFile(loginProperty)

    PropertiesBuilder(kafkaProperties).putAll(loginPropertiesBuilder).get
  }

  val topic: String = appProperties.getProperty("topic")
  val outputDir: String = appProperties.getProperty("outputDir")

  val pollDuration: Int = appProperties.getProperty("poll.duration").toInt
  val waitDuration: Long = appProperties.getProperty("wait.duration").toLong
  val closeDuration: Int = appProperties.getProperty("close.duration").toInt

  val thread: SafeThread = new SafeThread(KafkaConsumerApp.logger) {
    override def runSafely(): Unit = {
      val consumer = new KafkaConsumer(localKafkaProperties, closeDuration, topic, outputDir)

      // autoClose isn't executed if the thread is shot down, so this hook is used instead.
      sys.ShutdownHookThread { consumer.close() }
      while (!isInterrupted)
        consumer.poll(pollDuration)
    }
  }

  if (interactive)
    thread.waitSafely(waitDuration)
}

object KafkaConsumerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  new KafkaConsumerApp(args)
} 
Example 158
Source File: CurlProducerApp.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.wmproducer

import com.typesafe.config.ConfigFactory
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.Sinker
import org.clulab.wm.wmexchanger.utils.StringUtils
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object CurlProducerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val version = "0.2.3"

  val inputDir = args(0)
  val outputFile = args(1)

  val config = ConfigFactory.load("curlproducer")
  val service = config.getString("CurlProducerApp.service")
  val login = config.getString("CurlProducerApp.login")
  val properties = PropertiesBuilder.fromFile(login).get
  val username = properties.getProperty("username")
  val password = properties.getProperty("password")

  val files = FileUtils.findFiles(inputDir, "jsonld")

  Sinker.printWriterFromFile(outputFile).autoClose { printWriter =>
    files.foreach { file =>
      logger.info(s"Processing ${file.getName}")
      val docId = StringUtils.beforeFirst(file.getName, '.')
      try {
        val command = s"""curl
            |--basic
            |--user "$username:$password"
            |-X POST "$service"
            |-H "accept: application/json"
            |-H "Content-Type: multipart/form-data"
            |-F 'metadata={ "identity": "eidos", "version": "$version", "document_id": "$docId" }'
            |-F "file=@${file.getName}"
            |""".stripMargin.replace('\r', ' ').replace('\n', ' ')

        printWriter.print(command)
        printWriter.print("\n")
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
  }
} 
Example 159
Source File: DocumentFilter.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import org.clulab.processors.corenlp.CoreNLPDocument
import org.clulab.processors.shallownlp.ShallowNLPProcessor
import org.clulab.processors.{Document, Processor}
import org.slf4j.{Logger, LoggerFactory}

trait DocumentFilter {
  def whileFiltered(document: Document)(transform: Document => Document): Document
}

class FilterByNothing extends DocumentFilter {

  def whileFiltered(doc: Document)(transform: Document => Document): Document = transform(doc)
}

object FilterByNothing {
  def apply() = new FilterByNothing
}


class FilterByLength(processor: Processor, cutoff: Int = 200) extends DocumentFilter {

  def whileFiltered(doc: Document)(transform: Document => Document): Document = {
    val text = doc.text
    val filteredDoc = filter(doc)
    val transformedDoc = transform(filteredDoc)
    val unfilteredDoc = unfilter(transformedDoc, text)

    unfilteredDoc
  }

  protected def unfilter(doc: Document, textOpt: Option[String]): Document = {
    doc.text = textOpt
    doc
  }

  protected def filter(doc: Document): Document = {
    // Iterate through the sentences, any sentence that is too long (number of tokens), remove
    val sanitizedText = sanitizeText(doc)
    val kept = doc.sentences.filter(s => s.words.length < cutoff)
    val skipped = doc.sentences.length - kept.length
    val newDoc = Document(doc.id, kept, doc.coreferenceChains, doc.discourseTree, sanitizedText)
    val newerDoc = // This is a hack for lack of copy constructor for CoreNLPDocument
      if (doc.isInstanceOf[CoreNLPDocument])
        ShallowNLPProcessor.cluDocToCoreDoc(newDoc, keepText = true)
      else
        newDoc
    if (skipped != 0)
      FilterByLength.logger.info(s"skipping $skipped sentences")
    // Return a new document from these sentences
    newerDoc
  }

  protected def sanitizeText(doc: Document): Option[String] = doc.text.map { text =>
    // Assume that these characters are never parts of words.
    var newText = text.replace('\n', ' ').replace(0x0C.toChar, ' ')
    for (s <- doc.sentences if s.endOffsets.last < newText.size) {
      // Only perform this if it isn't part of a word.  A space is most reliable.
      if (newText(s.endOffsets.last) == ' ')
        newText = newText.updated(s.endOffsets.last, '\n')
    }
    newText
  }
}

object FilterByLength {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply(processor: Processor, cutoff: Int = 200): FilterByLength = new FilterByLength(processor, cutoff)
} 
Example 160
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 161
Source File: Timer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable

class Timer(val description: String) {
  var elapsedTime: Option[Long] = None
  var startTime: Option[Long] = None

  def time[R](block: => R): R = {
    val t0 = System.currentTimeMillis()
    val result: R = block    // call-by-name
    val t1 = System.currentTimeMillis()

    elapsedTime = Some(t1 - t0)
    result
  }

  def start(): Unit = {
    val t0 = System.currentTimeMillis()

    startTime = Some(t0)
  }

  def stop(): Unit = {
    if (startTime.isDefined) {
      val t1 = System.currentTimeMillis()

      elapsedTime = Some(t1 - startTime.get)
    }
  }

  override def toString: String = {
    if (elapsedTime.isDefined)
      s"\tTime\t$description\t${Timer.diffToString(elapsedTime.get)}"
    else if (startTime.isDefined)
      s"\tStart\t$description\t${startTime.get}\tms"
    else
      s"\tTimer\t$description"
  }
}

object Timer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val elapsedTimes: mutable.Map[String, Long] = mutable.Map.empty

  def addTime(key: String, milliseconds: Long): Unit = this synchronized {
    val oldTime = elapsedTimes.getOrElseUpdate(key, 0)
    val newTime = oldTime + milliseconds

    elapsedTimes.update(key, newTime)
  }

  def summarize: Unit = {
    elapsedTimes.toSeq.sorted.foreach { case (key, milliseconds) =>
      logger.info(s"\tTotal\t$key\t$milliseconds")
    }
  }

  def diffToString(diff: Long): String = {
    val  days = (diff / (1000 * 60 * 60 * 24)) / 1
    val hours = (diff % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60)
    val  mins = (diff % (1000 * 60 * 60)) / (1000 * 60)
    val  secs = (diff % (1000 * 60)) / 1000
    val msecs = (diff % (1000 * 1)) / 1

    f"$days:$hours%02d:$mins%02d:$secs%02d.$msecs%03d"
  }

  // See http://biercoff.com/easily-measuring-code-execution-time-in-scala/
  def time[R](description: String, verbose: Boolean = true)(block: => R): R = {
    val t0 = System.currentTimeMillis()
    if (verbose) logger.info(s"\tStart\t$description\t$t0\tms")

    val result: R = block // call-by-name

    val t1 = System.currentTimeMillis()
    if (verbose) logger.info(s"\tStop\t$description\t$t1\tms")

    val diff = t1 - t0
    if (verbose) logger.info(s"\tDiff\t$description\t$diff\tms")
    if (verbose) logger.info(s"\tTime\t$description\t${diffToString(diff)}")
    addTime(description, diff)
    result
  }
} 
Example 162
Source File: DomainHandler.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.groundings

import java.time.ZonedDateTime

import com.github.clulab.eidos.Version
import com.github.clulab.eidos.Versions
import com.github.worldModelers.ontologies.{Versions => AwayVersions}
import org.clulab.wm.eidos.SentencesExtractor
import org.clulab.wm.eidos.groundings.ontologies.FullTreeDomainOntology.FullTreeDomainOntologyBuilder
import org.clulab.wm.eidos.groundings.OntologyHandler.serializedPath
import org.clulab.wm.eidos.groundings.ontologies.CompactDomainOntology
import org.clulab.wm.eidos.groundings.ontologies.FastDomainOntology
import org.clulab.wm.eidos.groundings.ontologies.HalfTreeDomainOntology.HalfTreeDomainOntologyBuilder
import org.clulab.wm.eidos.utils.Canonicalizer
import org.clulab.wm.eidos.utils.StringUtils
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object DomainHandler {
  protected lazy val logger: Logger = LoggerFactory.getLogger(getClass)

  // The intention is to stop the proliferation of the generated Version class to this single method.
  protected def getVersionOpt(ontologyPath: String): (Option[String], Option[ZonedDateTime]) = {
    // This should work for local ontologies.  Absolute
    val goodVersionOpt = Versions.versions.get(MockVersions.codeDir + ontologyPath)
    // See what might have come from WordModelers/Ontologies
    val bestVersionOpt = goodVersionOpt.getOrElse {
      // These are always stored in top level directory.
      val awayVersionOpt = AwayVersions.versions.get(StringUtils.afterLast(ontologyPath, '/')).getOrElse(None)
      val homeVersionOpt = awayVersionOpt.map { awayVersion => Version(awayVersion.commit, awayVersion.date) }

      homeVersionOpt
    }

    if (bestVersionOpt.isDefined)
      (Some(bestVersionOpt.get.commit), Some(bestVersionOpt.get.date))
    else
      (None, None)
  }

  def apply(ontologyPath: String, serializedPath: String, sentencesExtractor: SentencesExtractor,
      canonicalizer: Canonicalizer, filter: Boolean = true, useCacheForOntologies: Boolean = false,
      includeParents: Boolean = false): DomainOntology = {

    // As coded below, when parents are included, the FullTreeDomainOntology is being used.
    // The faster loading version is the FastDomainOntology.
    // If parents are not included, as had traditionally been the case, the HalfTreeDomainOntology suffices.
    // Being smaller and faster, it is preferred.  The faster loading counterpart is CompactDomainOntology.
    if (includeParents) {
      if (useCacheForOntologies) {
        logger.info(s"Processing cached yml ontology with parents from $serializedPath...")
        FastDomainOntology.load(serializedPath)
      }
      else {
        logger.info(s"Processing yml ontology with parents from $ontologyPath...")
        val (versionOpt, dateOpt) = getVersionOpt(ontologyPath)
        new FullTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt)
      }
    }
    else {
      if (useCacheForOntologies) {
        logger.info(s"Processing cached yml ontology without parents from $serializedPath...")
        CompactDomainOntology.load(serializedPath)
      }
      else {
        logger.info(s"Processing yml ontology without parents from $ontologyPath...")
        val (versionOpt, dateOpt) = getVersionOpt(ontologyPath)
        new HalfTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt)
      }
    }
  }

  def mkDomainOntology(name: String, ontologyPath: String, sentenceExtractor: SentencesExtractor,
      canonicalizer: Canonicalizer, cacheDir: String, useCacheForOntologies: Boolean,
      includeParents: Boolean): DomainOntology = {
    val ontSerializedPath: String = serializedPath(name, cacheDir, includeParents)

    DomainHandler(ontologyPath, ontSerializedPath, sentenceExtractor, canonicalizer: Canonicalizer, filter = true,
        useCacheForOntologies = useCacheForOntologies, includeParents = includeParents)
  }
} 
Example 163
Source File: ExtractCluMetaFromDirectory.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.apps.batch

import java.io.File

import org.clulab.serialization.json.stringify
import org.clulab.wm.eidos.EidosSystem
import org.clulab.wm.eidos.groundings.EidosAdjectiveGrounder
import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileEditor
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.ThreadUtils
import org.clulab.wm.eidos.utils.Timer
import org.clulab.wm.eidos.utils.meta.CluText
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object ExtractCluMetaFromDirectory extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val inputDir = args(0)
  val metaDir = args(1)
  val outputDir = args(2)
  val timeFile = args(3)
  val threads = args(4).toInt

  val doneDir = inputDir + "/done"
  val textToMeta = CluText.convertTextToMeta _

  val files = FileUtils.findFiles(inputDir, "txt")
  val parFiles = ThreadUtils.parallelize(files, threads)

  Timer.time("Whole thing") {
    val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile)
    timePrintWriter.println("File\tSize\tTime")
    val timer = new Timer("Startup")

    timer.start()
    // Prime it first.  This counts on overall time, but should not be attributed
    // to any particular document.
    val config = EidosSystem.defaultConfig
    val reader = new EidosSystem(config)
    val options = EidosSystem.Options()
    // 0. Optionally include adjective grounding
    val adjectiveGrounder = EidosAdjectiveGrounder.fromEidosConfig(config)

    reader.extractFromText("This is a test.")
    timer.stop()

    timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get)

    parFiles.foreach { file =>
      try {
        // 1. Open corresponding output file
        logger.info(s"Extracting from ${file.getName}")
        val timer = new Timer("Single file in parallel")
        val size = timer.time {
          // 2. Get the input file text and metadata
          val metafile = textToMeta(file, metaDir)
          val eidosText = CluText(reader, file, Some(metafile))
          val text = eidosText.getText
          val metadata = eidosText.getMetadata
          // 3. Extract causal mentions from the text
          val annotatedDocument = reader.extractFromText(text, options, metadata)
          // 4. Convert to JSON
          val corpus = new JLDCorpus(annotatedDocument)
          val mentionsJSONLD = corpus.serialize()
          // 5. Write to output file
          val path = CluText.convertTextToJsonld(file, outputDir)
          FileUtils.printWriterFromFile(path).autoClose { pw =>
            pw.println(stringify(mentionsJSONLD, pretty = true))
          }
          // Now move the file to directory done
          val newFile = FileEditor(file).setDir(doneDir).get
          file.renameTo(newFile)

          text.length
        }
        this.synchronized {
          timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get)
        }
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
    timePrintWriter.close()
  }
} 
Example 164
Source File: ExtractCdrMetaFromDirectory.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.apps.batch

import org.clulab.wm.eidos.EidosSystem
import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileEditor
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.ThreadUtils
import org.clulab.wm.eidos.utils.Timer
import org.clulab.wm.eidos.utils.meta.CdrText
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object ExtractCdrMetaFromDirectory extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val inputDir = args(0)
  val outputDir = args(1)
  val timeFile = args(2)
  val threads = args(3).toInt

  val doneDir = inputDir + "/done"

  val files = FileUtils.findFiles(inputDir, "json")
  val parFiles = ThreadUtils.parallelize(files, threads)

  Timer.time("Whole thing") {
    val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile)
    timePrintWriter.println("File\tSize\tTime")
    val timer = new Timer("Startup")

    timer.start()
    // Prime it first.  This counts on overall time, but should not be attributed
    // to any particular document.
    val reader = new EidosSystem()
    val options = EidosSystem.Options()

    Timer.time("EidosPrimer") {
      reader.extractFromText("This is a test.")
    }
    timer.stop()
    timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get)

    parFiles.foreach { file =>
      try {
        // 1. Open corresponding output file
        logger.info(s"Extracting from ${file.getName}")
        val timer = new Timer("Single file in parallel")
        val size = timer.time {
          // 1. Get the input file text and metadata
          val eidosText = CdrText(file)
          val text = eidosText.getText
          val metadata = eidosText.getMetadata
          // 2. Extract causal mentions from the text
          val annotatedDocument = reader.extractFromText(text, options, metadata)
          // 3. Write to output file
          val path = FileEditor(file).setDir(outputDir).setExt("jsonld").get
          FileUtils.printWriterFromFile(path).autoClose { printWriter =>
            new JLDCorpus(annotatedDocument).serialize(printWriter)
          }
          // Now move the file to directory done
          val newFile = FileEditor(file).setDir(doneDir).get
          file.renameTo(newFile)

          text.length
        }
        this.synchronized {
          timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get)
        }
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
    timePrintWriter.close()
  }
} 
Example 165
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.elasticsearch.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 166
Source File: SharedSparkContext.scala    From sscheck   with Apache License 2.0 5 votes vote down vote up
package es.ucm.fdi.sscheck.spark

import org.apache.spark._

import org.slf4j.LoggerFactory


  def sparkAppName : String = "ScalaCheck Spark test"
  
  // lazy val so early definitions are not needed for subtyping
  @transient lazy val conf = new SparkConf().setMaster(sparkMaster).setAppName(sparkAppName)    
  
  @transient protected[this] var _sc : Option[SparkContext] = None
  def sc() : SparkContext = { 
    _sc.getOrElse {
      logger.warn("creating test Spark context")
      _sc = Some(new SparkContext(conf))
      _sc.get
    }
  }
  
  override def close() : Unit = {
    _sc.foreach { sc => 
      logger.warn("stopping test Spark context")
      sc.stop()
      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
      System.clearProperty("spark.driver.port")
    }
    _sc = None
  }
} 
Example 167
Source File: SharedStreamingContext.scala    From sscheck   with Apache License 2.0 5 votes vote down vote up
package es.ucm.fdi.sscheck.spark.streaming

import org.apache.spark.streaming.{StreamingContext,Duration}

import org.slf4j.LoggerFactory

import scala.util.Try 

import es.ucm.fdi.sscheck.spark.SharedSparkContext

trait SharedStreamingContext 
  extends SharedSparkContext {

  // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087
  // @transient private[this] val logger = Logger(LoggerFactory.getLogger("SharedStreamingContext"))
  @transient private val logger = LoggerFactory.getLogger("SharedStreamingContext")
  
   
        ssc.stop(stopSparkContext=false, stopGracefully=false) 
      } recover {
        case _ => {
          logger.warn("second attempt forcing stop of test Spark Streaming context")
          ssc.stop(stopSparkContext=false, stopGracefully=false)
        }
      }
      _ssc = None
    }
    if (stopSparkContext) {
      super[SharedSparkContext].close()
    }
  }
} 
Example 168
Source File: SharedStreamingContextBeforeAfterEachTest.scala    From sscheck   with Apache License 2.0 5 votes vote down vote up
package es.ucm.fdi.sscheck.spark.streaming

import org.junit.runner.RunWith
import org.specs2.runner.JUnitRunner 
import org.specs2.execute.Result

import org.apache.spark.streaming.Duration
import org.apache.spark.rdd.RDD

import scala.collection.mutable.Queue
import scala.concurrent.duration._

import org.slf4j.LoggerFactory

import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._

// sbt "test-only es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEachTest"

@RunWith(classOf[JUnitRunner])
class SharedStreamingContextBeforeAfterEachTest 
  extends org.specs2.Specification 
  with org.specs2.matcher.MustThrownExpectations 
  with org.specs2.matcher.ResultMatchers
  with SharedStreamingContextBeforeAfterEach {
  
  // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087
  @transient private val logger = LoggerFactory.getLogger("SharedStreamingContextBeforeAfterEachTest")
  
  // Spark configuration
  override def sparkMaster : String = "local[5]"
  override def batchDuration = Duration(250) 
  override def defaultParallelism = 3
  override def enableCheckpointing = false // as queueStream doesn't support checkpointing 
  
  def is = 
    sequential ^ s2"""
    Simple test for SharedStreamingContextBeforeAfterEach 
      where a simple queueStream test must be successful $successfulSimpleQueueStreamTest
      where a simple queueStream test can also fail $failingSimpleQueueStreamTest
    """      
            
  def successfulSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 0)
  def failingSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 1) must beFailing
        
  def simpleQueueStreamTest(expectedCount : Int) : Result = {
    val record = "hola"
    val batches = Seq.fill(5)(Seq.fill(10)(record))
    val queue = new Queue[RDD[String]]
    queue ++= batches.map(batch => sc.parallelize(batch, numSlices = defaultParallelism))
    val inputDStream = ssc.queueStream(queue, oneAtATime = true)
    val sizesDStream = inputDStream.map(_.length)
    
    var batchCount = 0
    // NOTE wrapping assertions with a Result object is needed
    // to avoid the Spark Streaming runtime capturing the exceptions
    // from failing assertions
    var result : Result = ok
    inputDStream.foreachRDD { rdd =>
      batchCount += 1
      println(s"completed batch number $batchCount: ${rdd.collect.mkString(",")}")
      result = result and {
        rdd.filter(_!= record).count() === expectedCount
        rdd should existsRecord(_ == "hola")
      }
    }
    sizesDStream.foreachRDD { rdd =>
      result = result and { 
        rdd should foreachRecord(record.length)(len => _ == len)      
      }
    }
    
    // should only start the dstream after all the transformations and actions have been defined
    ssc.start()
    
    // wait for completion of batches.length batches
    StreamingContextUtils.awaitForNBatchesCompleted(batches.length, atMost = 10 seconds)(ssc)
    
    result
  }
} 
Example 169
Source File: LogLevelRoutesSpec.scala    From akka-management   with Apache License 2.0 5 votes vote down vote up
package akka.management.loglevels.logback

import akka.actor.ExtendedActorSystem
import akka.http.javadsl.server.MalformedQueryParamRejection
import akka.http.scaladsl.model.StatusCodes
import akka.http.scaladsl.model.Uri
import akka.http.scaladsl.testkit.ScalatestRouteTest
import akka.management.scaladsl.ManagementRouteProviderSettings
import org.scalatest.Matchers
import org.scalatest.WordSpec
import org.slf4j.LoggerFactory
import akka.event.{ Logging => ClassicLogging }

class LogLevelRoutesSpec extends WordSpec with Matchers with ScalatestRouteTest {

  override def testConfigSource: String =
    """
      akka.loglevel = INFO
      """

  val routes = LogLevelRoutes
    .createExtension(system.asInstanceOf[ExtendedActorSystem])
    .routes(ManagementRouteProviderSettings(Uri("https://example.com"), readOnly = false))

  "The logback log level routes" must {

    "show log level of a Logger" in {
      Get("/loglevel/logback?logger=LogLevelRoutesSpec") ~> routes ~> check {
        responseAs[String]
      }
    }

    "change log level of a Logger" in {
      Put("/loglevel/logback?logger=LogLevelRoutesSpec&level=DEBUG") ~> routes ~> check {
        response.status should ===(StatusCodes.OK)
        LoggerFactory.getLogger("LogLevelRoutesSpec").isDebugEnabled should ===(true)
      }
    }

    "fail for unknown log level" in {
      Put("/loglevel/logback?logger=LogLevelRoutesSpec&level=MONKEY") ~> routes ~> check {
        rejection shouldBe an[MalformedQueryParamRejection]
      }
    }

    "not change loglevel if read only" in {
      val readOnlyRoutes = LogLevelRoutes
        .createExtension(system.asInstanceOf[ExtendedActorSystem])
        .routes(ManagementRouteProviderSettings(Uri("https://example.com"), readOnly = true))
      Put("/loglevel/logback?logger=LogLevelRoutesSpec&level=DEBUG") ~> readOnlyRoutes ~> check {
        response.status should ===(StatusCodes.Forbidden)
      }
    }

    "allow inspecting classic Akka loglevel" in {
      Get("/loglevel/akka") ~> routes ~> check {
        response.status should ===(StatusCodes.OK)
        responseAs[String] should ===("INFO")
      }
    }

    "allow changing classic Akka loglevel" in {
      Put("/loglevel/akka?level=DEBUG") ~> routes ~> check {
        response.status should ===(StatusCodes.OK)
        system.eventStream.logLevel should ===(ClassicLogging.DebugLevel)
      }
    }
  }

} 
Example 170
Source File: ParallelCpgPass.scala    From codepropertygraph   with Apache License 2.0 5 votes vote down vote up
package io.shiftleft.passes
import java.util.concurrent.LinkedBlockingQueue

import io.shiftleft.SerializedCpg
import io.shiftleft.codepropertygraph.Cpg
import org.apache.logging.log4j.{LogManager, Logger}
import org.slf4j.LoggerFactory

abstract class ParallelCpgPass[T](cpg: Cpg, outName: String = "") extends CpgPassBase {

  private val logger: Logger = LogManager.getLogger(classOf[ParallelCpgPass[T]])

  def init(): Unit = {}

  def partIterator: Iterator[T]

  def runOnPart(part: T): Option[DiffGraph]

  override def createAndApply(): Unit = {
    withWriter() { writer =>
      enqueueInParallel(writer)
    }
  }

  override def createApplySerializeAndStore(serializedCpg: SerializedCpg, inverse: Boolean, prefix: String): Unit = {
    withWriter(serializedCpg, prefix, inverse) { writer =>
      enqueueInParallel(writer)
    }
  }

  private def withWriter[X](serializedCpg: SerializedCpg = new SerializedCpg(),
                            prefix: String = "",
                            inverse: Boolean = false)(f: Writer => Unit): Unit = {
    val writer = new Writer(serializedCpg, prefix, inverse)
    val writerThread = new Thread(writer)
    writerThread.setName("Writer")
    writerThread.start()
    try {
      f(writer)
    } catch {
      case exception: Exception =>
        logger.warn(exception)
    } finally {
      writer.enqueue(None)
      writerThread.join()
    }
  }

  private def enqueueInParallel(writer: Writer): Unit = {
    init()
    val it = new ParallelIteratorExecutor(partIterator).map { part =>
      // Note: write.enqueue(runOnPart(part)) would be wrong because
      // it would terminate the writer as soon as a pass returns None
      // as None is used as a termination symbol for the queue
      runOnPart(part).foreach(diffGraph => writer.enqueue(Some(diffGraph)))
    }
    consume(it)
  }

  private def consume(it: Iterator[_]): Unit = {
    while (it.hasNext) {
      it.next()
    }
  }

  private class Writer(serializedCpg: SerializedCpg, prefix: String, inverse: Boolean) extends Runnable {

    private val logger = LoggerFactory.getLogger(getClass)

    private val queue = new LinkedBlockingQueue[Option[DiffGraph]]

    def enqueue(diffGraph: Option[DiffGraph]): Unit = {
      queue.put(diffGraph)
    }

    override def run(): Unit = {
      try {
        var terminate = false
        var index: Int = 0
        while (!terminate) {
          queue.take() match {
            case Some(diffGraph) =>
              val appliedDiffGraph = DiffGraph.Applier.applyDiff(diffGraph, cpg, inverse)
              if (!serializedCpg.isEmpty) {
                val overlay = serialize(appliedDiffGraph, inverse)
                val name = generateOutFileName(prefix, outName, index)
                index += 1
                store(overlay, name, serializedCpg)
              }
            case None =>
              logger.info("Shutting down WriterThread")
              terminate = true
          }
        }
      } catch {
        case _: InterruptedException => logger.info("Interrupted WriterThread")
      }
    }
  }

} 
Example 171
Source File: MLLib.scala    From spark-sql-perf   with Apache License 2.0 5 votes vote down vote up
package com.databricks.spark.sql.perf.mllib


import scala.io.Source
import scala.language.implicitConversions

import org.slf4j.LoggerFactory

import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}

import com.databricks.spark.sql.perf._


class MLLib(sqlContext: SQLContext)
  extends Benchmark(sqlContext) with Serializable {

  def this() = this(SQLContext.getOrCreate(SparkContext.getOrCreate()))
}

object MLLib {

  
  def run(yamlFile: String = null, yamlConfig: String = null): DataFrame = {
    logger.info("Starting run")
    val conf = getConf(yamlFile, yamlConfig)
    val sparkConf = new SparkConf().setAppName("MLlib QA").setMaster("local[2]")
    val sc = SparkContext.getOrCreate(sparkConf)
    sc.setLogLevel("INFO")
    val b = new com.databricks.spark.sql.perf.mllib.MLLib()
    val benchmarks = getBenchmarks(conf)
    println(s"${benchmarks.size} benchmarks identified:")
    val str = benchmarks.map(_.prettyPrint).mkString("\n")
    println(str)
    logger.info("Starting experiments")
    val e = b.runExperiment(
      executionsToRun = benchmarks,
      iterations = 1, // If you want to increase the number of iterations, add more seeds
      resultLocation = conf.output,
      forkThread = false)
    e.waitForFinish(conf.timeout.toSeconds.toInt)
    logger.info("Run finished")
    e.getCurrentResults()
  }
} 
Example 172
Source File: CarbonThriftServer.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.thriftserver

import java.io.File

import org.apache.spark.SparkConf
import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
import org.slf4j.{Logger, LoggerFactory}

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.spark.util.CarbonSparkUtil


object CarbonThriftServer {

  def main(args: Array[String]): Unit = {
    if (args.length != 0 && args.length != 3) {
      val logger: Logger = LoggerFactory.getLogger(this.getClass)
      logger.error("parameters: [access-key] [secret-key] [s3-endpoint]")
      System.exit(0)
    }
    val sparkConf = new SparkConf(loadDefaults = true)
    val builder = SparkSession
      .builder()
      .config(sparkConf)
      .appName("Carbon Thrift Server(uses CarbonExtensions)")
      .enableHiveSupport()
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
    configPropertiesFile(sparkConf, builder)
    if (args.length == 3) {
      builder.config(CarbonSparkUtil.getSparkConfForS3(args(0), args(1), args(2)))
    }
    val spark = builder.getOrCreate()
    CarbonEnv.getInstance(spark)
    waitingForSparkLaunch()
    HiveThriftServer2.startWithContext(spark.sqlContext)
  }

  private def waitingForSparkLaunch(): Unit = {
    val warmUpTime = CarbonProperties.getInstance().getProperty("carbon.spark.warmUpTime", "5000")
    try {
      Thread.sleep(Integer.parseInt(warmUpTime))
    } catch {
      case e: Exception =>
        val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
        LOG.error(s"Wrong value for carbon.spark.warmUpTime $warmUpTime " +
                  "Using default Value and proceeding")
        Thread.sleep(5000)
    }
  }

  private def configPropertiesFile(sparkConf: SparkConf, builder: SparkSession.Builder): Unit = {
    sparkConf.contains("carbon.properties.filepath") match {
      case false =>
        val sparkHome = System.getenv.get("SPARK_HOME")
        if (null != sparkHome) {
          val file = new File(sparkHome + '/' + "conf" + '/' + "carbon.properties")
          if (file.exists()) {
            builder.config("carbon.properties.filepath", file.getCanonicalPath)
            System.setProperty("carbon.properties.filepath", file.getCanonicalPath)
          }
        }
      case true =>
        System.setProperty(
          "carbon.properties.filepath", sparkConf.get("carbon.properties.filepath"))
    }
  }
} 
Example 173
Source File: S3CsvExample.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.examples

import java.io.File

import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, SECRET_KEY}
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

object S3CsvExample {

  
  def main(args: Array[String]) {
    val rootPath = new File(this.getClass.getResource("/").getPath
                            + "../../../..").getCanonicalPath
    val logger: Logger = LoggerFactory.getLogger(this.getClass)

    import org.apache.spark.sql.CarbonUtils._
    if (args.length != 4) {
      logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" +
                   "<s3.csv.location> <spark-master>")
      System.exit(0)
    }

    val spark = SparkSession
      .builder()
      .master(args(3))
      .appName("S3CsvExample")
      .config("spark.driver.host", "localhost")
      .config("spark.hadoop." + ACCESS_KEY, args(0))
      .config("spark.hadoop." + SECRET_KEY, args(1))
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()

    spark.sparkContext.setLogLevel("ERROR")

    spark.sql(
      s"""
         | CREATE TABLE if not exists carbon_table1(
         | shortField SHORT,
         | intField INT,
         | bigintField LONG,
         | doubleField DOUBLE,
         | stringField STRING,
         | timestampField TIMESTAMP,
         | decimalField DECIMAL(18,2),
         | dateField DATE,
         | charField CHAR(5),
         | floatField FLOAT
         | )
         | STORED AS carbondata
         | LOCATION '$rootPath/examples/spark/target/store'
       """.stripMargin)

    spark.sql(
      s"""
         | LOAD DATA LOCAL INPATH '${ args(2) }'
         | INTO TABLE carbon_table1
         | OPTIONS('HEADER'='true')
       """.stripMargin)

    spark.sql(
      s"""
         | LOAD DATA LOCAL INPATH '${ args(2) }'
         | INTO TABLE carbon_table1
         | OPTIONS('HEADER'='true')
       """.stripMargin)

    spark.sql(
      s"""
         | SELECT *
         | FROM carbon_table1
      """.stripMargin).show()

    spark.sql("Drop table if exists carbon_table1")

    spark.stop()
  }
} 
Example 174
Source File: S3UsingSDkExample.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.examples

import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, ENDPOINT, SECRET_KEY}
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field}
import org.apache.carbondata.sdk.file.{CarbonWriter, Schema}
import org.apache.carbondata.spark.util.CarbonSparkUtil



  def main(args: Array[String]) {
    val logger: Logger = LoggerFactory.getLogger(this.getClass)

    if (args.length < 2 || args.length > 6) {
      logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" +
        "[table-path-on-s3] [s3-endpoint] [number-of-rows] [spark-master]")
      System.exit(0)
    }

    val (accessKey, secretKey, endpoint) = CarbonSparkUtil.getKeyOnPrefix(args(2))
    val spark = SparkSession
      .builder()
      .master(getSparkMaster(args))
      .appName("S3UsingSDKExample")
      .config("spark.driver.host", "localhost")
      .config(accessKey, args(0))
      .config(secretKey, args(1))
      .config(endpoint, CarbonSparkUtil.getS3EndPoint(args))
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")
    val path = if (args.length < 3) {
      "s3a://sdk/WriterOutput2 "
    } else {
      args(2)
    }
    val num = if (args.length > 4) {
      Integer.parseInt(args(4))
    } else {
      3
    }
    buildTestData(args, path, num)

    spark.sql("DROP TABLE IF EXISTS s3_sdk_table")
    spark.sql(s"CREATE EXTERNAL TABLE s3_sdk_table STORED AS carbondata" +
      s" LOCATION '$path'")
    spark.sql("SELECT * FROM s3_sdk_table LIMIT 10").show()
    spark.stop()
  }

  def getSparkMaster(args: Array[String]): String = {
    if (args.length == 6) args(5)
    else "local"
  }

} 
Example 175
Source File: CouchbaseReadSideHandler.scala    From akka-persistence-couchbase   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.internal.javadsl.persistence.couchbase

import java.util.concurrent.CompletionStage

import akka.Done
import akka.japi.Pair
import akka.stream.ActorAttributes
import akka.stream.alpakka.couchbase.javadsl.CouchbaseSession
import akka.stream.javadsl.Flow
import com.lightbend.lagom.internal.javadsl.persistence.OffsetAdapter
import com.lightbend.lagom.internal.persistence.couchbase.{CouchbaseOffsetDao, CouchbaseOffsetStore}
import com.lightbend.lagom.javadsl.persistence.ReadSideProcessor.ReadSideHandler
import com.lightbend.lagom.javadsl.persistence.{AggregateEvent, AggregateEventTag, Offset}
import org.slf4j.LoggerFactory

import scala.compat.java8.FutureConverters._
import scala.concurrent.{ExecutionContext, Future}


private[couchbase] final class CouchbaseReadSideHandler[Event <: AggregateEvent[Event]](
    couchbaseSession: CouchbaseSession,
    offsetStore: CouchbaseOffsetStore,
    handlers: Map[Class[_ <: Event], Handler[Event]],
    globalPrepareCallback: CouchbaseSession => CompletionStage[Done],
    prepareCallback: (CouchbaseSession, AggregateEventTag[Event]) => CompletionStage[Done],
    readProcessorId: String,
    dispatcher: String
)(implicit ec: ExecutionContext)
    extends ReadSideHandler[Event] {
  private val log = LoggerFactory.getLogger(this.getClass)

  @volatile
  private var offsetDao: CouchbaseOffsetDao = _

  protected def invoke(handler: Handler[Event], event: Event, offset: Offset): CompletionStage[Done] =
    handler
      .asInstanceOf[(CouchbaseSession, Event, Offset) => CompletionStage[Done]]
      .apply(couchbaseSession, event, offset)
      .toScala
      .flatMap { _ =>
        val akkaOffset = OffsetAdapter.dslOffsetToOffset(offset)
        offsetDao.bindSaveOffset(akkaOffset).execute(couchbaseSession.asScala, ec)
      }
      .toJava

  override def globalPrepare(): CompletionStage[Done] = globalPrepareCallback.apply(couchbaseSession)

  override def prepare(tag: AggregateEventTag[Event]): CompletionStage[Offset] =
    (for {
      _ <- prepareCallback.apply(couchbaseSession, tag).toScala
      dao <- offsetStore.prepare(readProcessorId, tag.tag)
    } yield {
      offsetDao = dao
      OffsetAdapter.offsetToDslOffset(dao.loadedOffset)
    }).toJava

  override def handle(): Flow[Pair[Event, Offset], Done, _] =
    akka.stream.scaladsl
      .Flow[Pair[Event, Offset]]
      .mapAsync(parallelism = 1) { pair =>
        val Pair(event, offset) = pair
        val eventClass = event.getClass

        val handler =
          handlers.getOrElse(
            // lookup handler
            eventClass,
            // fallback to empty handler if none
            {
              if (log.isDebugEnabled()) log.debug("Unhandled event [{}]", eventClass.getName)
              CouchbaseReadSideHandler.emptyHandler
            }
          )

        invoke(handler, event, offset).toScala
      }
      .withAttributes(ActorAttributes.dispatcher(dispatcher))
      .asJava
} 
Example 176
Source File: CouchbaseReadSideHandler.scala    From akka-persistence-couchbase   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.internal.scaladsl.persistence.couchbase

import akka.persistence.query.Offset
import akka.stream.ActorAttributes
import akka.stream.alpakka.couchbase.scaladsl.CouchbaseSession
import akka.stream.scaladsl.Flow
import akka.{Done, NotUsed}
import com.lightbend.lagom.internal.persistence.couchbase.{CouchbaseOffsetDao, CouchbaseOffsetStore}
import com.lightbend.lagom.scaladsl.persistence.ReadSideProcessor.ReadSideHandler
import com.lightbend.lagom.scaladsl.persistence._
import org.slf4j.LoggerFactory

import scala.concurrent.{ExecutionContext, Future}


private[couchbase] final class CouchbaseReadSideHandler[Event <: AggregateEvent[Event]](
    couchbase: CouchbaseSession,
    offsetStore: CouchbaseOffsetStore,
    handlers: Map[Class[_ <: Event], CouchbaseReadSideHandler.Handler[Event]],
    globalPrepareCallback: CouchbaseSession => Future[Done],
    prepareCallback: (CouchbaseSession, AggregateEventTag[Event]) => Future[Done],
    readProcessorId: String,
    dispatcher: String
)(implicit ec: ExecutionContext)
    extends ReadSideHandler[Event] {
  import CouchbaseReadSideHandler.Handler

  private val log = LoggerFactory.getLogger(this.getClass)

  @volatile
  private var offsetDao: CouchbaseOffsetDao = _

  protected def invoke(handler: Handler[Event], element: EventStreamElement[Event]): Future[Done] =
    handler
      .apply(couchbase, element)
      .flatMap(_ => offsetDao.bindSaveOffset(element.offset).execute(couchbase, ec))

  override def globalPrepare(): Future[Done] = globalPrepareCallback(couchbase)

  override def prepare(tag: AggregateEventTag[Event]): Future[Offset] =
    for {
      _ <- prepareCallback.apply(couchbase, tag)
      dao <- offsetStore.prepare(readProcessorId, tag.tag)
    } yield {
      offsetDao = dao
      dao.loadedOffset
    }

  override def handle(): Flow[EventStreamElement[Event], Done, NotUsed] =
    Flow[EventStreamElement[Event]]
      .mapAsync(parallelism = 1) { elem =>
        val eventClass = elem.event.getClass

        val handler =
          handlers.getOrElse(
            // lookup handler
            eventClass,
            // fallback to empty handler if none
            {
              if (log.isDebugEnabled()) log.debug("Unhandled event [{}]", eventClass.getName)
              CouchbaseReadSideHandler.emptyHandler.asInstanceOf[Handler[Event]]
            }
          )

        invoke(handler, elem)
      }
      .withAttributes(ActorAttributes.dispatcher(dispatcher))
} 
Example 177
Source File: Factory.scala    From mystem-scala   with MIT License 5 votes vote down vote up
package ru.stachek66.nlp.mystem.holding

import java.io.{File, IOException}
import java.nio.file.Files
import java.nio.file.attribute.PosixFilePermissions

import org.slf4j.LoggerFactory
import ru.stachek66.tools.external.FailSafeExternalProcessServer
import ru.stachek66.tools.{Decompressor, Downloader, Tools}

import scala.concurrent.duration._
import scala.sys.process._
import scala.util.Try


  def newMyStem(version: String, customExecutable: Option[File] = None): Try[MyStem] = Try {

    val ex = customExecutable match {
      case Some(exe) => exe
      case None => getExecutable(version)
    }

    version match {
      case "3.0" | "3.1" =>
        new MyStem3(
          new FailSafeExternalProcessServer(
            ex.getAbsolutePath + (if (parsingOptions.nonEmpty) " " + parsingOptions else "")))
      case _ => throw new NotImplementedError()
    }
  }

  @throws(classOf[Exception])
  private[holding] def getExecutable(version: String): File = {

    val destFile = new File(BinDestination + BIN_FILE_NAME)
    val tempFile = new File(s"${BinDestination}tmp_${System.currentTimeMillis}.${Decompressor.select.traditionalExtension}")

    if (destFile.exists) {

      log.info("Old executable file found")

      try {
        val suggestedVersion = (destFile.getAbsolutePath + " -v") !!

        log.info("Version | " + suggestedVersion)
        // not scala-way stuff
        if (suggestedVersion.contains(version))
          destFile
        else
          throw new Exception("Wrong version!")
      } catch {
        case e: Exception =>
          log.warn("Removing old binary files...", e)
          destFile.delete
          getExecutable(version)
      }
    } else Tools.withAttempt(10, 1.second) {
      try {
        Decompressor.select.unpack(
          Downloader.downloadBinaryFile(getUrl(version), tempFile), destFile)
      } finally {
        tempFile.delete()
        try {
          Files.setPosixFilePermissions(destFile.toPath, PosixFilePermissions.fromString("r-xr-xr-x")).toFile
        } catch {
          case ioe: IOException =>
            log.warn("Can't set POSIX permissions to file " + destFile.toPath)
            destFile
        }
      }
    }
  }
} 
Example 178
Source File: package.scala    From mystem-scala   with MIT License 5 votes vote down vote up
package ru.stachek66.nlp

import org.slf4j.LoggerFactory


package object mystem {

  private val log = LoggerFactory.getLogger(getClass)

  val os: Map[(String, String), String] = Map(
    ("Linux", "x86_64") -> "linux64",
    ("Linux", "amd64") -> "linux64",
    ("Linux", "x86") -> "linux32",
    ("Windows7", "x86") -> "win32",
    ("Windows7", "x86_64") -> "win64"
  ) withDefault {
    _ =>
      log.warn("Getting OSX binaries!")
      "osx"
  }
} 
Example 179
Source File: Tools.scala    From mystem-scala   with MIT License 5 votes vote down vote up
package ru.stachek66.tools

import org.slf4j.LoggerFactory

import scala.concurrent.duration._


object Tools {

  private val log = LoggerFactory.getLogger(getClass)

  @throws(classOf[Exception])
  def withAttempt[T](n: Int, timeout: Duration = 0.millis)(action: => T): T = try {
    action
  } catch {
    case e: Exception if n > 1 =>
      log.warn(s"${n - 1} attempts left", e)
      Thread.sleep(timeout.toMillis)
      withAttempt(n - 1)(action)
    case e: Exception =>
      throw new Exception("No attempts left", e)
  }
} 
Example 180
Source File: Log.scala    From fs2-rabbit   with Apache License 2.0 5 votes vote down vote up
package dev.profunktor.fs2rabbit.effects

import cats.effect.Sync
import org.slf4j.LoggerFactory

trait Log[F[_]] {
  def info(value: => String): F[Unit]
  def error(value: => String): F[Unit]
}

object Log {
  private[fs2rabbit] val logger = LoggerFactory.getLogger(this.getClass)

  def apply[F[_]](implicit ev: Log[F]): Log[F] = ev

  implicit def syncLogInstance[F[_]](implicit F: Sync[F]): Log[F] =
    new Log[F] {
      override def error(value: => String): F[Unit] = F.delay(logger.error(value))
      override def info(value: => String): F[Unit]  = F.delay(logger.info(value))
    }
} 
Example 181
Source File: LoanWordsProcessor.scala    From attic-nlp4l   with Apache License 2.0 5 votes vote down vote up
package org.nlp4l.syn

import org.apache.lucene.search.spell.LuceneLevenshteinDistance
import org.nlp4l.core.RawReader
import org.nlp4l.framework.models._
import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory}
import org.nlp4l.lm.{HmmTokenizer, HmmModel}
import org.slf4j.LoggerFactory

import scala.collection.mutable.ListBuffer
import scala.util.matching.Regex

class LoanWordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) {
  override def getInstance: DictionaryAttribute = {

    val list = Seq[CellAttribute](
      CellAttribute("word", CellType.StringType, true, true),
      CellAttribute("synonym", CellType.StringType, false, true)
    )
    new DictionaryAttribute("loanWords", list)
  }
}

class LoanWordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) {

  val DEF_THRESHOLD = 0.8F
  val DEF_MIN_DOCFREQ = 3

  override def getInstance: Processor = {
    val index = getStrParamRequired("index")
    val field = getStrParamRequired("field")
    val modelIndex = getStrParamRequired("modelIndex")
    val threshold = getFloatParam("threshold", DEF_THRESHOLD)
    val minDocFreq = getIntParam("minDocFreq", DEF_MIN_DOCFREQ)
    new LoanWordsProcessor(index, field, modelIndex, threshold, minDocFreq)
  }
}

class LoanWordsProcessor(val index: String, val field: String, val modelIndex: String,
                          val threshold: Float, val minDocFreq: Int) extends Processor {

  override def execute(data: Option[Dictionary]): Option[Dictionary] = {
    val logger = LoggerFactory.getLogger(this.getClass)
    val reader = RawReader(index)
    val trModel = new TransliterationModelIndex(modelIndex)

    val pattern: Regex = """([a-z]+) ([\u30A0-\u30FF]+)""".r
    val lld = new LuceneLevenshteinDistance()

    val records = ListBuffer.empty[Record]
    try{
      var progress = 0
      val fi = reader.field(field)
      fi match {
        case Some(f) => {
          val len = f.uniqTerms
          f.terms.foreach { t =>
            progress = progress + 1
            if((progress % 10000) == 0){
              val percent = ((progress.toFloat / len) * 100).toInt
              logger.info(s"$percent % done ($progress / $len) term is ${t.text}")
            }
            if (t.docFreq >= minDocFreq) {
              t.text match {
                case pattern(a, b) => {
                  val predWord = trModel.predict(b)
                  if (lld.getDistance(a, predWord) > threshold) {
                    records += Record(Seq(Cell("word", a), Cell("synonym", b)))
                  }
                }
                case _ => {}
              }
            }
          }
          Some(Dictionary(records))
        }
        case _ => throw new RuntimeException(s"""field "$field" you specified in conf file doesn't exist in the index "$index""")
      }
    }
    finally{
      if(reader != null) reader.close
    }
  }
}

class TransliterationModelIndex(index: String){

  private val model = HmmModel(index)

  private val tokenizer = HmmTokenizer(model)

  def predict(katakana: String): String = {
    tokenizer.tokens(katakana).map(_.cls).mkString
  }
} 
Example 182
Source File: UnifySynonymRecordsProcessor.scala    From attic-nlp4l   with Apache License 2.0 5 votes vote down vote up
package org.nlp4l.syn

import org.nlp4l.framework.models._
import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory}
import org.slf4j.LoggerFactory

class UnifySynonymRecordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) {
  override def getInstance: DictionaryAttribute = {

    val list = Seq[CellAttribute](
      CellAttribute("synonyms", CellType.StringType, false, true)
    )
    new DictionaryAttribute("unifySynonymRecords", list)
  }
}

class UnifySynonymRecordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) {

  override def getInstance: Processor = {
    val logger = LoggerFactory.getLogger(this.getClass)
    val separator = settings.getOrElse("separator", ",")
    val sortReverse = getBoolParam("sortReverse", false)
    logger.info("""separator "{}", sortReverse "{}"""", separator, sortReverse)
    new UnifySynonymRecordsProcessor(sortReverse, separator)
  }
}

class UnifySynonymRecordsProcessor(val sortReverse: Boolean, val separator: String) extends Processor {

  override def execute(data: Option[Dictionary]): Option[Dictionary] = {
    data match {
      case None => None
      case Some(dic) => {
        val inputRecords = dic.recordList.map{ r =>
          r.cellList.map{ c => c.value.toString }
        }
        val uniqueRecords = SynonymCommon.getUniqueRecords(inputRecords, Seq())
        Some(Dictionary(for(r <- uniqueRecords) yield {
          Record(Seq(Cell("synonyms", r.mkString(separator))))
        }))
      }
    }
  }
} 
Example 183
Source File: TermsExtractionProcessor.scala    From attic-nlp4l   with Apache License 2.0 5 votes vote down vote up
package org.nlp4l.extract

import org.nlp4l.framework.models._
import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory}
import org.nlp4l.lucene.LuceneDocTermVector
import org.nlp4l.lucene.TermsExtractor
import org.nlp4l.lucene.TermsExtractor.Config
import org.slf4j.LoggerFactory

import scala.collection.mutable.ListBuffer

class TermsExtractionDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) {
  override def getInstance: DictionaryAttribute = {
    val outScore = getBoolParam("outScore", true)
    val list = if(outScore){
      Seq[CellAttribute](
        CellAttribute("term", CellType.StringType, true, true),
        // use constant hashCode so that we don't take into account score when calculating hashCode of Records
        CellAttribute("score", CellType.FloatType, false, true, constantHashCode => 0)
      )
    }
    else {
      Seq[CellAttribute](
        CellAttribute("term", CellType.StringType, true, true)
      )
    }
    new DictionaryAttribute("terms", list)
  }
}

class TermsExtractionProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) {

  override def getInstance: Processor = {
    val logger = LoggerFactory.getLogger(this.getClass)
    val config = new Config()
    config.index = getStrParamRequired("index")
    config.outScore = getBoolParam("outScore", true)
    config.fieldCn = getStrParamRequired("field")
    config.fieldLn2 = settings.getOrElse("fieldln2", null)
    config.fieldRn2 = settings.getOrElse("fieldrn2", null)
    config.delimiter = settings.getOrElse("delimiter", "/")
    config.outNum = getIntParam("num", org.nlp4l.lucene.TermsExtractor.DEF_OUT_NUM)
    config.scorer = settings.getOrElse("scorer", "FreqDFLR")
    logger.info(
      """TermsExtractionProcessor starts with parameters
        |    index "{}"
        |    field "{}"
        |    fieldln2 "{}"
        |    fieldrn2 "{}"
        |    delimiter "{}"
        |    num "{}"
        |    scorer "{}"
        |    outScore "{}"""".stripMargin,
      config.index, config.fieldCn, config.fieldLn2, config.fieldRn2, config.delimiter, config.outNum.toString, config.scorer, config.outScore.toString)
    new TermsExtractionProcessor(config)
  }
}

class TermsExtractionProcessor(val config: Config) extends Processor {

  override def execute(data: Option[Dictionary]): Option[Dictionary] = {
    val te = new ProcTermsExtractor(config)
    te.setConfig()
    te.execute()
    Some(Dictionary(te.records))
  }
}

class ProcTermsExtractor(config: Config) extends TermsExtractor(config: Config) {

  val records = ListBuffer.empty[Record]
  val logger = LoggerFactory.getLogger(this.getClass)

  override def printResultEntry(e: java.util.Map.Entry[String, LuceneDocTermVector.TermWeight]): Unit ={
    if(config.outScore){
      records += Record(Seq(Cell("term", getTerm(e)), Cell("score", getScore(e))))
    }
    else{
      records += Record(Seq(Cell("term", getTerm(e))))
    }
  }
} 
Example 184
Source File: BuddyWordsProcessor.scala    From attic-nlp4l   with Apache License 2.0 5 votes vote down vote up
package org.nlp4l.colloc

import org.nlp4l.core.RawReader
import org.nlp4l.framework.processors._
import org.nlp4l.framework.models._
import org.slf4j.LoggerFactory

import scala.collection.mutable.ListBuffer

class BuddyWordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) {
  override def getInstance: DictionaryAttribute = {

    val list = Seq[CellAttribute](
      CellAttribute("word", CellType.StringType, true, true),
      CellAttribute("buddies", CellType.StringType, false, true)
    )
    new DictionaryAttribute("buddyWords", list)
  }
}

class BuddyWordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) {

  val DEF_MAX_DOCS_TO_ANALYZE: Int = 1000
  val DEF_SLOP: Int = 5
  val DEF_MAX_COI_TERMS_PER_TERM: Int = 20
  val DEF_MAX_BASE_TERMS_PER_DOC: Int = 10 * 1000

  override def getInstance: Processor = {
    val index = getStrParamRequired("index")
    val field = getStrParamRequired("field")
    val srcField = field            // use same field name for source field for now
    val maxDocsToAnalyze = getIntParam("maxDocsToAnalyze", DEF_MAX_DOCS_TO_ANALYZE)
    val slop = getIntParam("slop", DEF_SLOP)
    val maxCoiTermsPerTerm = getIntParam("maxCoiTermsPerTerm", DEF_MAX_COI_TERMS_PER_TERM)
    val maxBaseTermsPerDoc = getIntParam("maxBaseTermsPerDoc", DEF_MAX_BASE_TERMS_PER_DOC)
    new BuddyWordsProcessor(index, field, srcField, maxDocsToAnalyze, slop, maxCoiTermsPerTerm, maxBaseTermsPerDoc)
  }
}

class BuddyWordsProcessor(val index: String, val field: String, val srcField: String, val maxDocsToAnalyze: Int,
                          val slop: Int, val maxCoiTermsPerTerm: Int, val maxBaseTermPerDoc: Int) extends Processor {

  override def execute(data: Option[Dictionary]): Option[Dictionary] = {
    val logger = LoggerFactory.getLogger(this.getClass)
    val reader = RawReader(index)
    val records = ListBuffer.empty[Record]
    try{
      var progress = 0
      val fi = reader.field(field)
      fi match {
        case Some(f) => {
          val finder = BuddyWordsFinder(reader, maxDocsToAnalyze, slop, maxCoiTermsPerTerm, maxBaseTermPerDoc)
          val len = f.uniqTerms
          f.terms.foreach{ t =>
            val result = finder.find(field, t.text)
            progress = progress + 1
            if((progress % 1000) == 0){
              val percent = ((progress.toFloat / len) * 100).toInt
              logger.info(s"$percent % done ($progress / $len) term is ${t.text}")
            }
            if(result.size > 0){
              records += Record(Seq(Cell("word", t.text), Cell("buddies", result.map(_._1).mkString(","))))
            }
          }
          Some(Dictionary(records))
        }
        case _ => throw new RuntimeException(s"""field "$field" you specified in conf file doesn't exist in the index "$index""")
      }
    }
    finally{
      if(reader != null) reader.close
    }
  }
} 
Example 185
Source File: LogstashLogbackEmitter.scala    From cedi-dtrace   with Apache License 2.0 5 votes vote down vote up
package com.ccadllc.cedi.dtrace
package logstash

import cats.effect.Sync
import cats.implicits._

import net.logstash.logback.marker.LogstashMarker
import net.logstash.logback.marker.Markers._

import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._


@deprecated("use EcsLogstashLogbackEmitter", "2.0.0")
final class LogstashLogbackEmitter[F[_]](implicit F: Sync[F]) extends TraceSystem.Emitter[F] {
  private val logger = LoggerFactory.getLogger("distributed-trace.logstash")

  final val description: String = "Logstash Logback Emitter"
  final def emit(tc: TraceContext[F]): F[Unit] = F.delay {
    if (logger.isDebugEnabled) {
      val s = tc.currentSpan
      val marker: LogstashMarker =
        append("where", tc.system.data.allValues.asJava).
          and[LogstashMarker](append("root", s.root)).
          and[LogstashMarker](append("trace-id", s.spanId.traceId.toString)).
          and[LogstashMarker](append("span-id", s.spanId.spanId)).
          and[LogstashMarker](append("parent-id", s.spanId.parentSpanId)).
          and[LogstashMarker](append("span-name", s.spanName.value)).
          and[LogstashMarker](append("start-time", s.startTime.show)).
          and[LogstashMarker](append("span-success", s.failure.isEmpty)).
          and[LogstashMarker](append("failure-detail", s.failure.map(_.render).orNull)).
          and[LogstashMarker](append("span-duration", s.duration.toMicros)).
          and[LogstashMarker](append("notes", s.notes.map(n => n.name.value -> n.value).collect { case (name, Some(value)) => name -> value.toString }.toMap.asJava))
      logger.debug(marker, "Span {} {} after {} microseconds",
        s.spanName.value,
        if (s.failure.isEmpty) "succeeded" else "failed",
        s.duration.toMicros.toString)
    }
  }
} 
Example 186
Source File: EcsLogstashLogbackEmitter.scala    From cedi-dtrace   with Apache License 2.0 5 votes vote down vote up
package com.ccadllc.cedi.dtrace
package logstash

import cats.effect.Sync
import cats.implicits._

import net.logstash.logback.marker.LogstashMarker
import net.logstash.logback.marker.Markers._

import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._


final class EcsLogstashLogbackEmitter[F[_]](implicit F: Sync[F]) extends TraceSystem.Emitter[F] {
  object ecs {
    object field {
      val kind: String = "event.kind"
      val module: String = "event.module"
      val root: String = "dtrace.root"
      val traceId: String = "dtrace.trace_id"
      val parentId: String = "dtrace.parent_id"
      val spanId: String = "event.id"
      val spanName: String = "event.action"
      val spanStart: String = "event.start"
      val spanOutcome: String = "event.outcome"
      val spanDuration: String = "event.duration"
      val spanFailureDetail: String = "error.message"
      val spanMetadata: String = "labels"
    }
  }
  private val logger = LoggerFactory.getLogger("distributed-trace.ecs.logstash")
  final val description: String = "ECS-Compliant Logstash Logback Emitter"
  final def emit(tc: TraceContext[F]): F[Unit] = F.delay {
    if (logger.isDebugEnabled) {
      val s = tc.currentSpan
      val marker: LogstashMarker = {
        val m = append(ecs.field.kind, "event").
          and[LogstashMarker](append(ecs.field.module, "dtrace")).
          and[LogstashMarker](append(ecs.field.root, s.root)).
          and[LogstashMarker](append(ecs.field.traceId, s.spanId.traceId.toString)).
          and[LogstashMarker](append(ecs.field.parentId, s.spanId.parentSpanId)).
          and[LogstashMarker](append(ecs.field.spanId, s.spanId.spanId)).
          and[LogstashMarker](append(ecs.field.spanName, s.spanName.value)).
          and[LogstashMarker](append(ecs.field.spanStart, s.startTime.show)).
          and[LogstashMarker](append(ecs.field.spanOutcome, if (s.failure.isEmpty) "success" else "failure")).
          and[LogstashMarker](append(ecs.field.spanDuration, s.duration.toUnit(tc.system.timer.unit))).
          and[LogstashMarker](append(ecs.field.spanFailureDetail, s.failure.map(_.render).orNull)).
          and[LogstashMarker](append(
            ecs.field.spanMetadata,
            (tc.system.data.meta.values ++ s.notes.map(
              n => n.name.value -> n.value).collect { case (name, Some(value)) => name -> value.toString }.toMap).asJava))
        tc.system.data.identity.values.foldLeft(m) { case (acc, (k, v)) => acc.and[LogstashMarker](append(k, v)) }
      }
      logger.debug(marker, s"Span {} {} after {} ${tc.system.timer.unit.toString.toLowerCase}s",
        s.spanName.value,
        if (s.failure.isEmpty) "succeeded" else "failed",
        s.duration.toUnit(tc.system.timer.unit).toString)
    }
  }
} 
Example 187
Source File: Logging.scala    From make-your-programs-free   with GNU General Public License v3.0 5 votes vote down vote up
package free

import scalaz._, Scalaz._
import scalaz.concurrent.Task
import org.slf4j.LoggerFactory

sealed trait Logging[A]
case class Info(line: String) extends Logging[Unit]
case class Warn(line: String) extends Logging[Unit]
case class Error(line: String) extends Logging[Unit]
case class Debug(line: String) extends Logging[Unit]

object Logging {

  class Ops[S[_]](implicit s0: Logging :<: S) {
    def info(line: String): Free[S, Unit] = Free.liftF(s0.inj(Info(line)))
    def warn(line: String): Free[S, Unit] = Free.liftF(s0.inj(Warn(line)))
    def error(line: String): Free[S, Unit] = Free.liftF(s0.inj(Error(line)))
    def debug(line: String): Free[S, Unit] = Free.liftF(s0.inj(Debug(line)))
  }

  object Ops {
    implicit def apply[S[_]](implicit S: Logging :<: S): Ops[S] =
      new Ops[S]
  }
}

object Log4JInterpreter extends (Logging ~> Task) {
  def apply[A](inout: Logging[A]): Task[A] = inout match {
    case Info(line) => Task.delay {
      LoggerFactory.getLogger(this.getClass).info(line)
    }
    case Error(line) => Task.delay {
      LoggerFactory.getLogger(this.getClass).error(line)
    }
    case Warn(line) => Task.delay {
      LoggerFactory.getLogger(this.getClass).warn(line)
    }
    case Debug(line) => Task.delay {
      LoggerFactory.getLogger(this.getClass).debug(line)
    }
  }
}

object RunLogging extends App {

  implicit val ops = new Logging.Ops[Logging]()

  val program = for {
    _ <- ops.info("starting application!")
    _ <- ops.debug("omg, app is running!")
  } yield()

  val task: Task[Unit] = program.foldMap(Log4JInterpreter)

  task.unsafePerformSync
} 
Example 188
Source File: DeltaLoad.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.algo

import com.adidas.analytics.algo.DeltaLoad._
import com.adidas.analytics.algo.core.Algorithm
import com.adidas.analytics.algo.shared.DateComponentDerivation
import com.adidas.analytics.config.DeltaLoadConfiguration.PartitionedDeltaLoadConfiguration
import com.adidas.analytics.util.DataFrameUtils._
import com.adidas.analytics.util._
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import org.apache.spark.storage.StorageLevel
import org.slf4j.{Logger, LoggerFactory}


  private def getUpsertRecords(deltaRecords: Dataset[Row], resultColumns: Seq[String]): Dataset[Row] = {
    // Create partition window - Partitioning by delta records logical key (i.e. technical key of active records)
    val partitionWindow = Window
      .partitionBy(businessKey.map(col): _*)
      .orderBy(technicalKey.map(component => col(component).desc): _*)

    // Ranking & projection
    val rankedDeltaRecords = deltaRecords
      .withColumn(rankingColumnName, row_number().over(partitionWindow))
      .filter(upsertRecordsModesFilterFunction)

    rankedDeltaRecords
      .filter(rankedDeltaRecords(rankingColumnName) === 1)
      .selectExpr(resultColumns: _*)
  }

  protected def withDatePartitions(spark: SparkSession, dfs: DFSWrapper, dataFrames: Vector[DataFrame]): Vector[DataFrame] = {
    logger.info("Adding partitioning information if needed")
    try {
      dataFrames.map { df =>
        if (df.columns.toSeq.intersect(targetPartitions) != targetPartitions){
          df.transform(withDateComponents(partitionSourceColumn, partitionSourceColumnFormat, targetPartitions))
        }
        else df
      }
    } catch {
      case e: Throwable =>
        logger.error("Cannot add partitioning information for data frames.", e)
        //TODO: Handle failure case properly
        throw new RuntimeException("Unable to transform data frames.", e)
    }
  }
}


object DeltaLoad {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): DeltaLoad = {
    new DeltaLoad(spark, dfs, configLocation)
  }
} 
Example 189
Source File: GzipDecompressor.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.algo

import java.util.concurrent.{Executors, TimeUnit}

import com.adidas.analytics.algo.GzipDecompressor.{changeFileExtension, compressedExtension, _}
import com.adidas.analytics.algo.core.JobRunner
import com.adidas.analytics.config.GzipDecompressorConfiguration
import com.adidas.analytics.util.DFSWrapper
import com.adidas.analytics.util.DFSWrapper._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.IOUtils
import org.apache.hadoop.io.compress.CompressionCodecFactory
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent._
import scala.concurrent.duration._


final class GzipDecompressor protected(val spark: SparkSession, val dfs: DFSWrapper, val configLocation: String)
  extends JobRunner with GzipDecompressorConfiguration {

  private val hadoopConfiguration: Configuration = spark.sparkContext.hadoopConfiguration
  private val fileSystem: FileSystem = dfs.getFileSystem(inputDirectoryPath)


  override def run(): Unit = {
    //check if directory exists
    if (!fileSystem.exists(inputDirectoryPath)){
      logger.error(s"Input directory: $inputDirectoryPath does not exist.")
      throw new RuntimeException(s"Directory $inputDirectoryPath does not exist.")
    }

    val compressedFilePaths = fileSystem.ls(inputDirectoryPath, recursive)
      .filterNot(path => fileSystem.isDirectory(path))
      .filter(_.getName.toLowerCase.endsWith(compressedExtension))

    if (compressedFilePaths.isEmpty) {
      logger.warn(s"Input directory $inputDirectoryPath does not contain compressed files. Skipping...")
    } else {
      implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(threadPoolSize))
      Await.result(Future.sequence(
        compressedFilePaths.map { compressedFilePath =>
          Future {
            logger.info(s"Decompressing file: $compressedFilePath")

            val decompressedFileName = changeFileExtension(compressedFilePath.getName, compressedExtension, outputExtension)
            val decompressedFilePath = new Path(compressedFilePath.getParent, decompressedFileName)

            val compressionCodecFactory = new CompressionCodecFactory(hadoopConfiguration)
            val inputCodec = compressionCodecFactory.getCodec(compressedFilePath)

            val inputStream = inputCodec.createInputStream(fileSystem.open(compressedFilePath))
            val output = fileSystem.create(decompressedFilePath)

            IOUtils.copyBytes(inputStream, output, hadoopConfiguration)
            logger.info(s"Finished decompressing file: $compressedFilePath")

            //Delete the compressed file
            fileSystem.delete(compressedFilePath, false)
            logger.info(s"Removed file: $compressedFilePath")
          }
        }
      ), Duration(4, TimeUnit.HOURS))
    }
  }
}


object GzipDecompressor {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private val compressedExtension: String = ".gz"

  def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): GzipDecompressor = {
    new GzipDecompressor(spark, dfs, configLocation)
  }

  private def changeFileExtension(fileName: String, currentExt: String, newExt: String): String = {
    val newFileName =  fileName.substring(0, fileName.lastIndexOf(currentExt))
    if (newFileName.endsWith(newExt)) newFileName else newFileName + newExt
  }
} 
Example 190
Source File: DataFrameUtils.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, functions}
import org.slf4j.{Logger, LoggerFactory}


object DataFrameUtils {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  type FilterFunction = Row => Boolean

  type PartitionCriteria = Seq[(String, String)]

  def mapPartitionsToDirectories(partitionCriteria: PartitionCriteria): Seq[String] = {
    partitionCriteria.map {
      case (columnName, columnValue) => s"$columnName=$columnValue"
    }
  }

  def buildPartitionsCriteriaMatcherFunc(multiplePartitionsCriteria: Seq[PartitionCriteria], schema: StructType): FilterFunction = {
    val targetPartitions = multiplePartitionsCriteria.flatten.map(_._1).toSet
    val fieldNameToMatchFunctionMapping = schema.fields.filter {
      case StructField(name, _, _, _) => targetPartitions.contains(name)
    }.map {
      case StructField(name, _: ByteType, _, _)    => name -> ((r: Row, value: String) => r.getAs[Byte](name)    == value.toByte)
      case StructField(name, _: ShortType, _, _)   => name -> ((r: Row, value: String) => r.getAs[Short](name)   == value.toShort)
      case StructField(name, _: IntegerType, _, _) => name -> ((r: Row, value: String) => r.getAs[Int](name)     == value.toInt)
      case StructField(name, _: LongType, _, _)    => name -> ((r: Row, value: String) => r.getAs[Long](name)    == value.toLong)
      case StructField(name, _: FloatType, _, _)   => name -> ((r: Row, value: String) => r.getAs[Float](name)   == value.toFloat)
      case StructField(name, _: DoubleType, _, _)  => name -> ((r: Row, value: String) => r.getAs[Double](name)  == value.toDouble)
      case StructField(name, _: BooleanType, _, _) => name -> ((r: Row, value: String) => r.getAs[Boolean](name) == value.toBoolean)
      case StructField(name, _: StringType, _, _)  => name -> ((r: Row, value: String) => r.getAs[String](name)  == value)
    }.toMap

    def convertPartitionCriteriaToFilterFunctions(partitionCriteria: PartitionCriteria): Seq[FilterFunction] = partitionCriteria.map {
      case (name, value) => (row: Row) => fieldNameToMatchFunctionMapping(name)(row, value)
    }

    def joinSinglePartitionFilterFunctionsWithAnd(partitionFilterFunctions: Seq[FilterFunction]): FilterFunction =
      partitionFilterFunctions
        .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) && predicate2(row))
        .getOrElse((_: Row) => false)

    multiplePartitionsCriteria
      .map(convertPartitionCriteriaToFilterFunctions)
      .map(joinSinglePartitionFilterFunctionsWithAnd)
      .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) || predicate2(row))
      .getOrElse((_: Row) => false)
  }


  implicit class DataFrameHelper(df: DataFrame) {

    def collectPartitions(targetPartitions: Seq[String]): Seq[PartitionCriteria] = {
      logger.info(s"Collecting unique partitions for partitions columns (${targetPartitions.mkString(", ")})")
      val partitions = df.selectExpr(targetPartitions: _*).distinct().collect()

      partitions.map { row =>
        targetPartitions.map { columnName =>
          Option(row.getAs[Any](columnName)) match {
            case Some(columnValue) => columnName -> columnValue.toString
            case None => throw new RuntimeException(s"Partition column '$columnName' contains null value")
          }
        }
      }
    }

    def addMissingColumns(targetSchema: StructType): DataFrame = {
      val dataFieldsSet = df.schema.fieldNames.toSet
      val selectColumns = targetSchema.fields.map { field =>
        if (dataFieldsSet.contains(field.name)) {
          functions.col(field.name)
        } else {
          functions.lit(null).cast(field.dataType).as(field.name)
        }
      }
      df.select(selectColumns: _*)
    }

    def isEmpty: Boolean = df.head(1).isEmpty

    def nonEmpty: Boolean = df.head(1).nonEmpty
  }
} 
Example 191
Source File: DataFormat.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import org.apache.spark.sql._
import org.apache.spark.sql.types.StructType
import org.slf4j.{Logger, LoggerFactory}


sealed trait DataFormat {

  protected val logger: Logger = LoggerFactory.getLogger(getClass)

  def read(reader: DataFrameReader, locations: String*): DataFrame

  def write(writer: DataFrameWriter[Row], location: String): Unit
}


object DataFormat {

  case class ParquetFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading Parquet data from $filesString")
      optionalSchema.fold(reader)(schema => reader.schema(schema)).parquet(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing Parquet data to $location")
      writer.parquet(location)
    }
  }

  case class DSVFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading DSV data from $filesString")
      optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).csv(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing DSV data to $location")
      writer.csv(location)
    }
  }

  case class JSONFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading JSON data from $filesString")
      optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).json(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing JSON data to $location")
      writer.json(location)
    }
  }
} 
Example 192
Source File: InputReader.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}


  def newTableLocationReader(table: String, format: DataFormat, options: Map[String, String] = Map.empty): TableLocationReader = {
    TableLocationReader(table, format, options)
  }

  case class TableReader(table: String, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      logger.info(s"Reading data from table $table")
      sparkSession.read.options(options).table(table)
    }
  }

  case class FileSystemReader(location: String, format: DataFormat, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      logger.info(s"Reading data from location $location")
      format.read(sparkSession.read.options(options), location)
    }
  }

  case class TableLocationReader(table: String, format: DataFormat, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      val location = HiveTableAttributeReader(table, sparkSession).getTableLocation
      logger.info(s"Reading data from location $location")
      format.read(sparkSession.read.options(options), location)
    }
  }
} 
Example 193
Source File: ConfigReader.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import java.text.DecimalFormatSymbols

import org.slf4j.{Logger, LoggerFactory}

import scala.util.parsing.json.{JSON, JSONArray, JSONObject}


class ConfigReader(jsonContent: String) extends Serializable {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  private val decimalSeparator: Char = new DecimalFormatSymbols().getDecimalSeparator

  JSON.globalNumberParser = (in: String) => if (in.contains(decimalSeparator)) in.toDouble else in.toInt

  private lazy val config = JSON.parseRaw(jsonContent) match {
    case Some(JSONObject(obj)) => obj
    case _ => throw new IllegalArgumentException(s"Wrong format of the configuration file: $jsonContent")
  }

  def getAsSeq[T](propertyName: String): Seq[T] = {
    config.get(propertyName) match {
      case Some(JSONArray(list)) => list.map(_.asInstanceOf[T])
      case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAsMap[K, V](propertyName: String): Map[K,V] = {
    config.get(propertyName) match {
      case Some(JSONObject(obj)) => obj.asInstanceOf[Map[K,V]]
      case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAs[T](propertyName: String): T = {
    config.get(propertyName) match {
      case Some(property) => property.asInstanceOf[T]
      case None => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAsOption[T](propertyName: String): Option[T] = {
    config.get(propertyName).map(property => property.asInstanceOf[T])
  }

  def getAsOptionSeq[T](propertyName: String): Option[Seq[T]] = {
    config.get(propertyName).map(_ => getAsSeq(propertyName))
  }

  def contains(propertyName: String): Boolean = {
    config.contains(propertyName)
  }
}

object ConfigReader {
  def apply(jsonContent: String): ConfigReader = new ConfigReader(jsonContent)
} 
Example 194
Source File: BaseAlgorithmTest.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.utils

import java.util.UUID

import com.adidas.analytics.util.{DFSWrapper, LoadMode}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.types.StructType
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source

trait BaseAlgorithmTest extends Suite with BeforeAndAfterAll with BeforeAndAfterEach with HDFSSupport with SparkSupport {

  override val logger: Logger = LoggerFactory.getLogger(getClass)
  override val testAppId: String = UUID.randomUUID().toString
  override val localTestDir: String = "target"
  override val sparkHadoopConf: Option[Configuration] = Some(fs.getConf)

  val hdfsRootTestPath: Path = new Path("hdfs:///tmp/tests")
  val dfs: DFSWrapper = DFSWrapper(spark.sparkContext.hadoopConfiguration)

  override def afterAll(): Unit = {
    spark.stop()
    cluster.shutdown(true)
  }

  override def beforeEach(): Unit = {
    fs.delete(hdfsRootTestPath, true)
    fs.mkdirs(hdfsRootTestPath)
  }

  override def afterEach(): Unit = {
    spark.sqlContext.clearCache()
    spark.sparkContext.getPersistentRDDs.foreach {
      case (_, rdd) => rdd.unpersist(true)
    }
  }

  def resolveResource(fileName: String, withProtocol: Boolean = false): String = {
    val resource = s"${getClass.getSimpleName}/$fileName"
    logger.info(s"Resolving resource $resource")
    val location = getClass.getClassLoader.getResource(resource).getPath
    if (withProtocol) {
      s"file://$location"
    } else {
      location
    }
  }

  def getResourceAsText(fileName: String): String = {
    val resource = s"${getClass.getSimpleName}/$fileName"
    logger.info(s"Reading resource $resource")
    val stream = getClass.getClassLoader.getResourceAsStream(resource)
    Source.fromInputStream(stream).mkString
  }

  def copyResourceFileToHdfs(resource: String, targetPath: Path): Unit = {
    val localResourceRoot = resolveResource("", withProtocol = true)
    val sourcePath = new Path(localResourceRoot, resource)
    logger.info(s"Copying local resource to HDFS $sourcePath -> $targetPath")
    fs.copyFromLocalFile(sourcePath, targetPath)
  }

  
  def createAndLoadParquetTable(database: String, tableName: String, partitionColumns: Option[Seq[String]] = None, schema: StructType, filePath: String, reader: FileReader): Table = {
    val table = createParquetTable(database, tableName, partitionColumns, schema)
    val inputTableDataURI = resolveResource(filePath, withProtocol = true)
    table.write(Seq(inputTableDataURI), reader, LoadMode.OverwritePartitions)
    table
  }
} 
Example 195
Source File: Retries.scala    From http-verbs   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.http

import akka.actor.ActorSystem
import akka.pattern.after
import com.typesafe.config.Config
import java.util.concurrent.TimeUnit
import javax.net.ssl.SSLException
import org.slf4j.LoggerFactory
import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}
import uk.gov.hmrc.play.http.logging.Mdc

trait Retries {

  protected def actorSystem: ActorSystem

  protected def configuration: Option[Config]

  private val logger = LoggerFactory.getLogger("application")

  def retry[A](verb: String, url: String)(block: => Future[A])(implicit ec: ExecutionContext): Future[A] = {
    def loop(remainingIntervals: Seq[FiniteDuration])(mdcData: Map[String, String])(block: => Future[A]): Future[A] =
      // scheduling will loose MDC data. Here we explicitly ensure it is available on block.
      Mdc.withMdc(block, mdcData)
        .recoverWith {
          case ex @ `sslEngineClosedMatcher`() if remainingIntervals.nonEmpty =>
            val delay = remainingIntervals.head
            logger.warn(s"Retrying $verb $url in $delay due to '${ex.getMessage}' error")
            after(delay, actorSystem.scheduler)(loop(remainingIntervals.tail)(mdcData)(block))
        }
    loop(intervals)(Mdc.mdcData)(block)
  }

  private[http] lazy val intervals: Seq[FiniteDuration] = {
    val defaultIntervals = Seq(500.millis, 1.second, 2.seconds, 4.seconds, 8.seconds)
    configuration
      .map { c =>
        val path = "http-verbs.retries.intervals"
        if (c.hasPath(path)) {
          c.getDurationList(path).asScala.map { d =>
            FiniteDuration(d.toMillis, TimeUnit.MILLISECONDS)
          }
        } else {
          defaultIntervals
        }
      }
      .getOrElse(defaultIntervals)
  }

  private lazy val sslEngineClosedMatcher =
    new SSlEngineClosedMatcher(isEnabled("ssl-engine-closed-already"))

  private class SSlEngineClosedMatcher(enabled: Boolean) {
    def unapply(ex: Throwable): Boolean =
      ex match {
        case _: SSLException if ex.getMessage == "SSLEngine closed already" => enabled
        case _                                                              => false
      }
  }

  private def isEnabled(name: String): Boolean =
    configuration.exists { c =>
      val path = s"http-verbs.retries.$name.enabled"
      c.hasPath(path) && c.getBoolean(path)
    }

} 
Example 196
Source File: ConnectionTracing.scala    From http-verbs   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.http.logging

import org.slf4j.LoggerFactory
import uk.gov.hmrc.http.{HttpException, Upstream4xxResponse}

import scala.concurrent._
import scala.util.{Failure, Success, Try}

trait ConnectionTracing {

  lazy val connectionLogger = LoggerFactory.getLogger("connector")

  def withTracing[T](method: String, uri: String)(
    body: => Future[T])(implicit ld: LoggingDetails, ec: ExecutionContext): Future[T] = {
    val startAge = ld.age
    val f        = body
    f.onComplete(logResult(ld, method, uri, startAge))
    f
  }

  def logResult[A](ld: LoggingDetails, method: String, uri: String, startAge: Long)(result: Try[A]) = result match {
    case Success(ground) => connectionLogger.debug(formatMessage(ld, method, uri, startAge, "ok"))
    case Failure(ex: HttpException) if ex.responseCode == 404 =>
      connectionLogger.info(formatMessage(ld, method, uri, startAge, s"failed ${ex.getMessage}"))
    case Failure(ex: Upstream4xxResponse) if ex.upstreamResponseCode == 404 =>
      connectionLogger.info(formatMessage(ld, method, uri, startAge, s"failed ${ex.getMessage}"))
    case Failure(ex) => connectionLogger.warn(formatMessage(ld, method, uri, startAge, s"failed ${ex.getMessage}"))
  }

  import uk.gov.hmrc.http.logging.ConnectionTracing.formatNs

  def formatMessage(ld: LoggingDetails, method: String, uri: String, startAge: Long, message: String) = {
    val requestId    = ld.requestId.getOrElse("")
    val requestChain = ld.requestChain
    val durationNs   = ld.age - startAge
    s"$requestId:$method:$startAge:${formatNs(startAge)}:$durationNs:${formatNs(durationNs)}:${requestChain.value}:$uri:$message"
  }
}

object ConnectionTracing {
  def formatNs(ns: Long): String = {
    val nsPart = ns % 1000
    val usPart = ns / 1000 % 1000
    val msPart = ns / 1000000 % 1000
    val sPart  = ns / 1000000000

    if (sPart > 0) f"${(sPart * 1000 + msPart) / 1000.0}%03.3fs"
    else if (msPart > 0) f"${(msPart * 1000 + usPart) / 1000.0}%03.3fms"
    else if (usPart > 0) f"${(usPart * 1000 + nsPart) / 1000.0}%03.3fus"
    else s"${ns}ns"
  }
} 
Example 197
Source File: Main.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite

import org.slf4j.LoggerFactory
import yamrcraft.etlite.processors.EtlProcessor
import yamrcraft.etlite.utils.{DLock, FakeLock, FileUtils}

object Main {

  val logger = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {

    if (args.length != 1) {
      println(
        s"""
           |Usage: Main <config>
           |  <config> path to the application configuration file, use 'file:///' prefix in case file located on local file system.
         """.stripMargin)
      System.exit(1)
    }

    val configPath = args(0)
    logger.info(s"Configuration file = '$configPath'")
    val settings = new Settings(FileUtils.readContent(configPath))

    val lock = {
      if (settings.etl.lock.enabled)
        new DLock(settings.etl.lock.zookeeperConnect, settings.etl.lock.path, settings.etl.lock.waitForLockSeconds)
      else
        new FakeLock
    }

    if (lock.tryLock()) {
      EtlProcessor.run(settings)
      lock.release()
    } else {
      logger.error("can't acquire zookeeper lock!")
    }
  }

} 
Example 198
Source File: PartitionProcessor.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite.processors

import java.io.IOException

import org.slf4j.LoggerFactory
import yamrcraft.etlite.transformers.InboundMessage
import yamrcraft.etlite.writers.{ErrorInfo, ErrorEventWriter}
import yamrcraft.etlite.{ErrorType, EtlException, EtlSettings}

import scala.util.Try

class PartitionProcessor(jobId: Long, partitionId: Int, settings: EtlSettings) {

  val logger = LoggerFactory.getLogger(this.getClass)

  val pipeline = settings.pipeline.createFactory.createPipeline(settings.pipeline, jobId, partitionId)

  val errorsWriter: ErrorEventWriter = new ErrorEventWriter(settings.errorsFolder, jobId, partitionId)

  def processPartition(partition: Iterator[InboundMessage]): Unit = {
    logger.info(s"partition processing started [jobId=$jobId, partitionId=$partitionId]")

    partition foreach { inbound =>

      try {
        pipeline.processMessage(inbound)

      } catch {
        case e@(_: Exception) =>
          logger.error("event processing error", e)
          val errorType = e match {
            case ex: EtlException => ex.errorType.toString
            case _ : IOException => ErrorType.WriteError.toString
            case _ => ErrorType.SystemError.toString
          }
          val cause = Try(e.getCause.getMessage).getOrElse("")
          val errorInfo = ErrorInfo(errorType, Some(cause))
          errorsWriter.write((inbound.msg, errorInfo))
      }
    }

    pipeline.writer.commit()
    errorsWriter.commit()

    logger.info(s"partition processing ended [jobId=$jobId, partitionId=$partitionId]")
  }

} 
Example 199
Source File: EtlProcessor.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite.processors

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.DefaultDecoder
import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka._
import org.slf4j.LoggerFactory
import yamrcraft.etlite.Settings
import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager}
import yamrcraft.etlite.transformers.InboundMessage

object EtlProcessor {

  val logger = LoggerFactory.getLogger(this.getClass)

  def run(settings: Settings) = {
    val context = createContext(settings)

    val stateManager = new KafkaStateManager(settings.etl.state)

    val lastState = stateManager.readState
    logger.info(s"last persisted state: $lastState")

    val currState = stateManager.fetchNextState(lastState, settings)
    logger.info(s"batch working state: $currState")

    val rdd = createRDD(context, currState, settings)
    processRDD(rdd, currState.jobId, settings)

    logger.info("committing state")
    stateManager.commitState(currState)
  }

  private def createContext(settings: Settings) = {
    val sparkConf = new SparkConf()
      .setAppName(settings.spark.appName)
      .setAll(settings.spark.conf)

    new SparkContext(sparkConf)
  }

  private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = {
    KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage](
      context,
      settings.kafka.properties,
      state.ranges.toArray,
      Map[TopicAndPartition, Broker](),
      (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) }
    )
  }

  private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = {
    // passed to remote workers
    val etlSettings = settings.etl

    logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]")

    val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD)

    rdd.foreachPartition { partition =>
        // executed at the worker
        new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings)
          .processPartition(partition)
      }

    logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]")
  }


} 
Example 200
Source File: DLock.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite.utils

import java.util.concurrent.TimeUnit

import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex
import org.apache.curator.framework.{CuratorFramework, CuratorFrameworkFactory}
import org.apache.curator.retry.ExponentialBackoffRetry
import org.slf4j.LoggerFactory


class DLock(zkConnect: String, lockFile: String, waitForLockSeconds: Int) {

  val logger = LoggerFactory.getLogger(this.getClass)

  private var zkClient: Option[CuratorFramework] = None
  private var lock: Option[InterProcessSemaphoreMutex] = None

  def tryLock(): Boolean = {
    require(lock.isEmpty, "lock can't be reused")
    logger.info("acquiring lock...")
    zkClient = Some(CuratorFrameworkFactory.newClient(zkConnect, new ExponentialBackoffRetry(1000, 3)))
    zkClient.get.start()
    lock = Some(new InterProcessSemaphoreMutex(zkClient.get, lockFile))
    lock.get.acquire(waitForLockSeconds, TimeUnit.SECONDS)
  }

  def release() = {
    require(lock.nonEmpty, "lock wasn't acquired")
    logger.info("releasing lock")
    lock.foreach(_.release())
    zkClient.foreach(_.close())
  }

}

class FakeLock extends DLock("", "", 0) {
  override def tryLock() = true

  override def release() = {}
}