org.slf4j.Logger Scala Examples

The following examples show how to use org.slf4j.Logger. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SnowflakeConnectorUtils.scala    From spark-snowflake   with Apache License 2.0 6 votes vote down vote up
package net.snowflake.spark.snowflake

import java.nio.file.Paths
import java.security.InvalidKeyException

import net.snowflake.spark.snowflake.pushdowns.SnowflakeStrategy
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}


  def disablePushdownSession(session: SparkSession): Unit = {
    session.experimental.extraStrategies = session.experimental.extraStrategies
      .filterNot(strategy => strategy.isInstanceOf[SnowflakeStrategy])
  }

  def setPushdownSession(session: SparkSession, enabled: Boolean): Unit = {
    if (enabled) {
      enablePushdownSession(session)
    } else {
      disablePushdownSession(session)
    }
  }

  // TODO: Improve error handling with retries, etc.

  @throws[SnowflakeConnectorException]
  def handleS3Exception(ex: Exception): Unit = {
    if (ex.getCause.isInstanceOf[InvalidKeyException]) {
      // Most likely cause: Unlimited strength policy files not installed
      var msg: String = "Strong encryption with Java JRE requires JCE " +
        "Unlimited Strength Jurisdiction Policy " +
        "files. " +
        "Follow JDBC client installation instructions " +
        "provided by Snowflake or contact Snowflake " +
        "Support. This needs to be installed in the Java runtime for all Spark executor nodes."

      log.error(
        "JCE Unlimited Strength policy files missing: {}. {}.",
        ex.getMessage: Any,
        ex.getCause.getMessage: Any
      )

      val bootLib: String =
        java.lang.System.getProperty("sun.boot.library.path")

      if (bootLib != null) {
        msg += " The target directory on your system is: " + Paths
          .get(bootLib, "security")
          .toString
        log.error(msg)
      }

      throw new SnowflakeConnectorException(msg)
    } else {
      throw ex
    }
  }
}

class SnowflakeConnectorException(message: String) extends Exception(message)
class SnowflakePushdownException(message: String)
  extends SnowflakeConnectorException(message)
class SnowflakeConnectorFeatureNotSupportException(message: String)
  extends Exception(message)

class SnowflakePushdownUnsupportedException(message: String,
                                            val unsupportedOperation: String,
                                            val details: String,
                                            val isKnownUnsupportedOperation: Boolean)
  extends Exception(message) 
Example 2
Source File: TestHook.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake.test

import net.snowflake.client.jdbc.{ErrorCode, SnowflakeSQLException}
import net.snowflake.spark.snowflake.test.TestHookFlag.TestHookFlag
import org.slf4j.{Logger, LoggerFactory}

object TestHookFlag extends Enumeration {
  type TestHookFlag = Value

  // All predefined test hook's name start with TH_ (TEST HOOK).
  val TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE = Value("TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE")
  val TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE = Value("TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE")
  val TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE = Value("TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE")
  val TH_WRITE_ERROR_AFTER_COPY_INTO = Value("TH_WRITE_ERROR_AFTER_COPY_INTO")
  val TH_GCS_UPLOAD_RAISE_EXCEPTION = Value("TH_GCS_UPLOAD_RAISE_EXCEPTION")
  val TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS = Value("TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS")
  val TH_COPY_INTO_TABLE_MISS_FILES_FAIL = Value("TH_COPY_INTO_TABLE_MISS_FILES_FAIL")
}

object TestHook {
  val log: Logger = LoggerFactory.getLogger(getClass)

  private val ENABLED_TEST_FLAGS =
    new scala.collection.mutable.HashSet[TestHookFlag]()

  private var IS_TEST_ENABLED = false

  private val TEST_MESSAGE_PREFIX =
    "Internal test error (should NOT be seen by user):"

  // Enable test
  private[snowflake] def enableTestHook() : Unit = {
    IS_TEST_ENABLED = true
  }

  // Disable test
  private[snowflake] def disableTestHook() : Unit = {
    IS_TEST_ENABLED = false
    ENABLED_TEST_FLAGS.clear()
  }

  // Enable a specific test flag
  private[snowflake] def enableTestFlag(testFlag : TestHookFlag): Unit = {
    enableTestHook()
    if (!ENABLED_TEST_FLAGS.contains(testFlag)) {
      ENABLED_TEST_FLAGS.add(testFlag)
    }
  }

  // Enable a specific test flag only (all other flags are disabled)
  private[snowflake] def enableTestFlagOnly(testFlag : TestHookFlag): Unit = {
    disableTestHook()
    enableTestFlag(testFlag)
  }

  // Disable a specific test flag
  private[snowflake] def disableTestFlag(testFlag : TestHookFlag): Unit = {
    if (ENABLED_TEST_FLAGS.contains(testFlag)) {
      ENABLED_TEST_FLAGS.remove(testFlag)
    }
    if (ENABLED_TEST_FLAGS.isEmpty) {
      disableTestHook()
    }
  }

  // Check whether a flag is enabled
  private[snowflake] def isTestFlagEnabled(testFlag : TestHookFlag): Boolean = {
    IS_TEST_ENABLED && ENABLED_TEST_FLAGS.contains(testFlag)
  }

  // Raise exception if the specific test flag is enabled.
  private[snowflake] def raiseExceptionIfTestFlagEnabled(testFlag: TestHookFlag,
                                                         errorMessage: String)
  : Unit = {
    if (isTestFlagEnabled(testFlag)) {
      throw new SnowflakeSQLException(ErrorCode.INTERNAL_ERROR,
        s"$TEST_MESSAGE_PREFIX  $errorMessage")
    }
  }
} 
Example 3
Source File: ClusterTest.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake

import net.snowflake.spark.snowflake.testsuite.ClusterTestSuiteBase
import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.sql.SparkSession

object ClusterTest {
  val log: Logger = LoggerFactory.getLogger(getClass)

  val RemoteMode = "remote"
  val LocalMode = "local"

  val TestSuiteSeparator = ";"

  // Driver function to run the test.
  def main(args: Array[String]): Unit = {
    log.info(s"Test Spark Connector: ${net.snowflake.spark.snowflake.Utils.VERSION}")

    val usage = s"""Two parameters are need: [local | remote] and
                    | testClassNames (using ';' to separate multiple classes)
                    |""".stripMargin
    log.info(usage)

    if (args.length < 2) {
      throw new Exception(s"At least two parameters are need. Usage: $usage")
    }

    // Setup Spark session.
    // local mode is introduced for debugging purpose
    val runMode = args(0)
    var sparkSessionBuilder = SparkSession
      .builder()
      .appName("Spark SQL basic example")
      .config("spark.some.config.option", "some-value")
    if (runMode.equalsIgnoreCase(LocalMode)) {
      sparkSessionBuilder = sparkSessionBuilder
        .config("spark.master", "local")
    }
    val spark = sparkSessionBuilder.getOrCreate()

    // Run specified test suites
    val testSuiteNames = args(1).split(TestSuiteSeparator)
    for (testSuiteName <- testSuiteNames) {
      if (!testSuiteName.trim.isEmpty) {
        // Retrieve commit ID from env.
        val commitID = scala.util.Properties
          .envOrElse(TestUtils.GITHUB_SHA, "commit id not set")

        // val testSuiteName = "net.snowflake.spark.snowflake.testsuite.BasicReadWriteSuite"
        val resultBuilder = new ClusterTestResultBuilder()
          .withTestType("Scala")
          .withTestCaseName(testSuiteName)
          .withCommitID(commitID)
          .withTestStatus(TestUtils.TEST_RESULT_STATUS_INIT)
          .withStartTimeInMill(System.currentTimeMillis())
          .withGithubRunId(TestUtils.githubRunId)

        try {
          Class
            .forName(testSuiteName)
            .newInstance()
            .asInstanceOf[ClusterTestSuiteBase]
            .run(spark, resultBuilder)
        } catch {
          case e: Throwable =>
            log.error(e.getMessage)
            resultBuilder
              .withTestStatus(TestUtils.TEST_RESULT_STATUS_EXCEPTION)
              .withReason(e.getMessage)
        } finally {
          // Set test end time.
          resultBuilder
            .withEndTimeInMill(System.currentTimeMillis())
          // Write test result
          resultBuilder.build().writeToSnowflake()
        }
      }
    }

    spark.stop()
  }
} 
Example 4
Source File: AuthServiceJWT.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth

import java.util.concurrent.{CompletableFuture, CompletionStage}

import com.daml.lf.data.Ref
import com.daml.jwt.{JwtVerifier, JwtVerifierBase}
import com.daml.ledger.api.auth.AuthServiceJWT.Error
import io.grpc.Metadata
import org.slf4j.{Logger, LoggerFactory}
import spray.json._

import scala.collection.mutable.ListBuffer
import scala.util.Try


class AuthServiceJWT(verifier: JwtVerifierBase) extends AuthService {

  protected val logger: Logger = LoggerFactory.getLogger(AuthServiceJWT.getClass)

  override def decodeMetadata(headers: Metadata): CompletionStage[Claims] = {
    decodeAndParse(headers).fold(
      error => {
        logger.warn("Authorization error: " + error.message)
        CompletableFuture.completedFuture(Claims.empty)
      },
      token => CompletableFuture.completedFuture(payloadToClaims(token))
    )
  }

  private[this] def parsePayload(jwtPayload: String): Either[Error, AuthServiceJWTPayload] = {
    import AuthServiceJWTCodec.JsonImplicits._
    Try(JsonParser(jwtPayload).convertTo[AuthServiceJWTPayload]).toEither.left.map(t =>
      Error("Could not parse JWT token: " + t.getMessage))
  }

  private[this] def decodeAndParse(headers: Metadata): Either[Error, AuthServiceJWTPayload] = {
    val bearerTokenRegex = "Bearer (.*)".r

    for {
      headerValue <- Option
        .apply(headers.get(AUTHORIZATION_KEY))
        .toRight(Error("Authorization header not found"))
      token <- bearerTokenRegex
        .findFirstMatchIn(headerValue)
        .map(_.group(1))
        .toRight(Error("Authorization header does not use Bearer format"))
      decoded <- verifier
        .verify(com.daml.jwt.domain.Jwt(token))
        .toEither
        .left
        .map(e => Error("Could not verify JWT token: " + e.message))
      parsed <- parsePayload(decoded.payload)
    } yield parsed
  }

  private[this] def payloadToClaims(payload: AuthServiceJWTPayload): Claims = {
    val claims = ListBuffer[Claim]()

    // Any valid token authorizes the user to use public services
    claims.append(ClaimPublic)

    if (payload.admin)
      claims.append(ClaimAdmin)

    payload.actAs
      .foreach(party => claims.append(ClaimActAsParty(Ref.Party.assertFromString(party))))

    payload.readAs
      .foreach(party => claims.append(ClaimReadAsParty(Ref.Party.assertFromString(party))))

    Claims(
      claims = claims.toList,
      ledgerId = payload.ledgerId,
      participantId = payload.participantId,
      applicationId = payload.applicationId,
      expiration = payload.exp,
    )
  }
}

object AuthServiceJWT {
  final case class Error(message: String)

  def apply(verifier: com.auth0.jwt.interfaces.JWTVerifier) =
    new AuthServiceJWT(new JwtVerifier(verifier))

  def apply(verifier: JwtVerifierBase) =
    new AuthServiceJWT(verifier)
} 
Example 5
Source File: AuthorizationInterceptor.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth.interceptor

import com.daml.ledger.api.auth.{AuthService, Claims}
import com.daml.platform.server.api.validation.ErrorFactories.unauthenticated
import io.grpc.{
  Context,
  Contexts,
  Metadata,
  ServerCall,
  ServerCallHandler,
  ServerInterceptor,
  Status
}
import org.slf4j.{Logger, LoggerFactory}

import scala.compat.java8.FutureConverters
import scala.concurrent.ExecutionContext
import scala.util.{Failure, Success, Try}


final class AuthorizationInterceptor(protected val authService: AuthService, ec: ExecutionContext)
    extends ServerInterceptor {

  private val logger: Logger = LoggerFactory.getLogger(AuthorizationInterceptor.getClass)
  private val internalAuthenticationError =
    Status.INTERNAL.withDescription("Failed to get claims from request metadata")

  import AuthorizationInterceptor.contextKeyClaim

  override def interceptCall[ReqT, RespT](
      call: ServerCall[ReqT, RespT],
      headers: Metadata,
      nextListener: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
    // Note: Context uses ThreadLocal storage, we need to capture it outside of the async block below.
    // Contexts are immutable and safe to pass around.
    val prevCtx = Context.current

    // The method interceptCall() must return a Listener.
    // The target listener is created by calling `Contexts.interceptCall()`.
    // However, this is only done after we have asynchronously received the claims.
    // Therefore, we need to return a listener that buffers all messages until the target listener is available.
    new AsyncForwardingListener[ReqT] {
      FutureConverters
        .toScala(authService.decodeMetadata(headers))
        .onComplete {
          case Failure(exception) =>
            logger.warn(s"Failed to get claims from request metadata: ${exception.getMessage}")
            call.close(internalAuthenticationError, new Metadata())
            new ServerCall.Listener[Nothing]() {}
          case Success(Claims.empty) =>
            logger.debug(s"Auth metadata decoded into empty claims, returning UNAUTHENTICATED")
            call.close(Status.UNAUTHENTICATED, new Metadata())
            new ServerCall.Listener[Nothing]() {}
          case Success(claims) =>
            val nextCtx = prevCtx.withValue(contextKeyClaim, claims)
            // Contexts.interceptCall() creates a listener that wraps all methods of `nextListener`
            // such that `Context.current` returns `nextCtx`.
            val nextListenerWithContext =
              Contexts.interceptCall(nextCtx, call, headers, nextListener)
            setNextListener(nextListenerWithContext)
            nextListenerWithContext
        }(ec)
    }
  }
}

object AuthorizationInterceptor {

  private val contextKeyClaim = Context.key[Claims]("AuthServiceDecodedClaim")

  def extractClaimsFromContext(): Try[Claims] =
    Option(contextKeyClaim.get()).fold[Try[Claims]](Failure(unauthenticated()))(Success(_))

  def apply(authService: AuthService, ec: ExecutionContext): AuthorizationInterceptor =
    new AuthorizationInterceptor(authService, ec)

} 
Example 6
Source File: GlobalLogLevel.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.sandbox

import ch.qos.logback.classic.Level
import org.slf4j.{Logger, LoggerFactory}

object GlobalLogLevel {
  def set(level: Level): Unit = {
    val rootLogger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)
    LoggerFactory.getILoggerFactory match {
      case loggerContext: ch.qos.logback.classic.LoggerContext =>
        rootLogger.info(s"Sandbox verbosity changed to $level")
        loggerContext.getLoggerList.forEach(_.setLevel(level))
      case _ =>
        rootLogger.warn(s"Sandbox verbosity cannot be set to requested $level")
    }
  }
} 
Example 7
Source File: CommandCompletionServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_completion_service.CommandCompletionServiceGrpc.CommandCompletionService
import com.daml.ledger.api.v1.command_completion_service._
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.dec.DirectExecutionContext
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

//TODO: this class is only needed by DamlOnXCommandCompletionService.scala. Must be deleted once that's gone!
class CommandCompletionServiceValidation(
    val service: CommandCompletionService with AutoCloseable,
    val ledgerId: LedgerId)
    extends CommandCompletionService
    with FieldValidations
    with GrpcApiService
    with ProxyCloseable
    with ErrorFactories {

  protected val logger: Logger = LoggerFactory.getLogger(CommandCompletionService.getClass)

  override def completionStream(
      request: CompletionStreamRequest,
      responseObserver: StreamObserver[CompletionStreamResponse]): Unit = {
    val validation = for {
      _ <- matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      _ <- requireNonEmptyString(request.applicationId, "application_id")
      _ <- requireNonEmpty(request.parties, "parties")
    } yield request

    validation.fold(
      exception => responseObserver.onError(exception),
      value => service.completionStream(value, responseObserver)
    )
  }

  override def completionEnd(request: CompletionEndRequest): Future[CompletionEndResponse] = {
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .fold(Future.failed, _ => service.completionEnd(request))
  }

  override def bindService(): ServerServiceDefinition =
    CommandCompletionServiceGrpc.bindService(this, DirectExecutionContext)
} 
Example 8
Source File: ActiveContractsServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc.ActiveContractsService
import com.daml.ledger.api.v1.active_contracts_service.{
  ActiveContractsServiceGrpc,
  GetActiveContractsRequest,
  GetActiveContractsResponse
}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

class ActiveContractsServiceValidation(
    protected val service: ActiveContractsService with AutoCloseable,
    val ledgerId: LedgerId)
    extends ActiveContractsService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(ActiveContractsService.getClass)

  override def getActiveContracts(
      request: GetActiveContractsRequest,
      responseObserver: StreamObserver[GetActiveContractsResponse]): Unit = {
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .fold(responseObserver.onError, _ => service.getActiveContracts(request, responseObserver))
  }
  override def bindService(): ServerServiceDefinition =
    ActiveContractsServiceGrpc.bindService(this, DirectExecutionContext)
} 
Example 9
Source File: LedgerConfigurationServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.ledger_configuration_service.LedgerConfigurationServiceGrpc.LedgerConfigurationService
import com.daml.ledger.api.v1.ledger_configuration_service.{
  GetLedgerConfigurationRequest,
  GetLedgerConfigurationResponse,
  LedgerConfigurationServiceGrpc
}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

class LedgerConfigurationServiceValidation(
    protected val service: LedgerConfigurationService with GrpcApiService,
    protected val ledgerId: LedgerId)
    extends LedgerConfigurationService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(LedgerConfigurationService.getClass)

  override def getLedgerConfiguration(
      request: GetLedgerConfigurationRequest,
      responseObserver: StreamObserver[GetLedgerConfigurationResponse]): Unit =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId)).fold(
      t => responseObserver.onError(t),
      _ => service.getLedgerConfiguration(request, responseObserver)
    )

  override def bindService(): ServerServiceDefinition =
    LedgerConfigurationServiceGrpc.bindService(this, DirectExecutionContext)
} 
Example 10
Source File: PackageServiceValidation.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.package_service.PackageServiceGrpc.PackageService
import com.daml.ledger.api.v1.package_service._
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.Function.const
import scala.concurrent.Future

class PackageServiceValidation(
    protected val service: PackageService with AutoCloseable,
    val ledgerId: LedgerId)
    extends PackageService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(PackageService.getClass)

  override def listPackages(request: ListPackagesRequest): Future[ListPackagesResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.listPackages
      )

  override def getPackage(request: GetPackageRequest): Future[GetPackageResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.getPackage
      )

  override def getPackageStatus(
      request: GetPackageStatusRequest): Future[GetPackageStatusResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.getPackageStatus
      )
  override def bindService(): ServerServiceDefinition =
    PackageServiceGrpc.bindService(this, DirectExecutionContext)

  override def close(): Unit = service.close()
} 
Example 11
Source File: GrpcCommandService.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.services.grpc

import java.time.{Duration, Instant}

import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_service.CommandServiceGrpc.CommandService
import com.daml.ledger.api.v1.command_service._
import com.daml.ledger.api.validation.{CommandsValidator, SubmitAndWaitRequestValidator}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.dec.DirectExecutionContext
import com.daml.platform.server.api.ProxyCloseable
import com.google.protobuf.empty.Empty
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

class GrpcCommandService(
    protected val service: CommandService with AutoCloseable,
    val ledgerId: LedgerId,
    currentLedgerTime: () => Instant,
    currentUtcTime: () => Instant,
    maxDeduplicationTime: () => Option[Duration]
) extends CommandService
    with GrpcApiService
    with ProxyCloseable {

  protected val logger: Logger = LoggerFactory.getLogger(CommandService.getClass)

  private[this] val validator =
    new SubmitAndWaitRequestValidator(new CommandsValidator(ledgerId))

  override def submitAndWait(request: SubmitAndWaitRequest): Future[Empty] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWait(request))

  override def submitAndWaitForTransactionId(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionIdResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransactionId(request))

  override def submitAndWaitForTransaction(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransaction(request))

  override def submitAndWaitForTransactionTree(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionTreeResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransactionTree(request))

  override def bindService(): ServerServiceDefinition =
    CommandServiceGrpc.bindService(this, DirectExecutionContext)

} 
Example 12
Source File: GrpcCommandSubmissionService.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.services.grpc

import java.time.{Duration, Instant}

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_submission_service.CommandSubmissionServiceGrpc.{
  CommandSubmissionService => ApiCommandSubmissionService
}
import com.daml.ledger.api.v1.command_submission_service.{
  CommandSubmissionServiceGrpc,
  SubmitRequest => ApiSubmitRequest
}
import com.daml.ledger.api.validation.{CommandsValidator, SubmitRequestValidator}
import com.daml.metrics.{Metrics, Timed}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import com.daml.platform.server.api.services.domain.CommandSubmissionService
import com.google.protobuf.empty.Empty
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

class GrpcCommandSubmissionService(
    override protected val service: CommandSubmissionService with AutoCloseable,
    ledgerId: LedgerId,
    currentLedgerTime: () => Instant,
    currentUtcTime: () => Instant,
    maxDeduplicationTime: () => Option[Duration],
    metrics: Metrics,
) extends ApiCommandSubmissionService
    with ProxyCloseable
    with GrpcApiService {

  protected val logger: Logger = LoggerFactory.getLogger(ApiCommandSubmissionService.getClass)

  private val validator = new SubmitRequestValidator(new CommandsValidator(ledgerId))

  override def submit(request: ApiSubmitRequest): Future[Empty] =
    Timed.future(
      metrics.daml.commands.submissions,
      Timed
        .value(
          metrics.daml.commands.validation,
          validator
            .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()))
        .fold(
          Future.failed,
          service.submit(_).map(_ => Empty.defaultInstance)(DirectExecutionContext))
    )

  override def bindService(): ServerServiceDefinition =
    CommandSubmissionServiceGrpc.bindService(this, DirectExecutionContext)

} 
Example 13
Source File: Committer.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.committer

import com.codahale.metrics.Timer
import com.daml.ledger.participant.state.kvutils.DamlKvutils.{
  DamlConfigurationEntry,
  DamlLogEntry,
  DamlLogEntryId,
  DamlStateKey,
  DamlStateValue
}
import com.daml.ledger.participant.state.kvutils.{Conversions, DamlStateMap, Err}
import com.daml.ledger.participant.state.kvutils.committer.Committer._
import com.daml.ledger.participant.state.v1.{Configuration, ParticipantId}
import com.daml.lf.data.Time
import com.daml.metrics.Metrics
import org.slf4j.{Logger, LoggerFactory}


        throw Err.MissingInputState(Conversions.configurationStateKey)
      )
      .flatMap { v =>
        val entry = v.getConfigurationEntry
        Configuration
          .decode(entry.getConfiguration)
          .fold({ err =>
            logger.error(s"Failed to parse configuration: $err, using default configuration.")
            None
          }, conf => Some(Some(entry) -> conf))
      }
      .getOrElse(None -> defaultConfig)
} 
Example 14
Source File: AkkaStreamPerformanceTest.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.perf.util

import akka.actor.ActorSystem
import akka.stream.Materializer
import com.daml.ledger.api.testing.utils.Resource
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}

@SuppressWarnings(Array("org.wartremover.warts.LeakingSealed"))
abstract class AkkaStreamPerformanceTest extends PerformanceTest {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  type ResourceType

  @volatile protected var system: ActorSystem = _
  @volatile protected var materializer: Materializer = _
  @transient protected implicit val ec: ExecutionContextExecutor = ExecutionContext.global

  protected def resource: Resource[ResourceType]

  protected def setup(): Unit = {
    resource.setup()
    implicit val sys: ActorSystem = ActorSystem(this.getClass.getSimpleName.stripSuffix("$"))
    system = sys
    materializer = Materializer(system)
  }

  protected def teardown(): Unit = {
    await(system.terminate())
    resource.close()
  }

  implicit class FixtureSetup[T](using: Using[T]) extends Serializable {
    def withLifecycleManagement(additionalSetup: T => Unit = _ => ()): Using[T] =
      using
        .setUp { input =>
          try {
            setup()
            additionalSetup(input)
          } catch {
            case t: Throwable =>
              logger.error("Setup failed.", t)
              throw t
          }
        }
        .tearDown { _ =>
          try {
            teardown()
          } catch {
            case t: Throwable =>
              logger.error("Teardown failed.", t)
              throw t
          }
        }
  }
} 
Example 15
Source File: TraceLog.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.speedy

import com.daml.lf.data.Ref.Location
import org.slf4j.Logger

final case class TraceLog(logger: Logger, capacity: Int) {

  private val buffer = Array.ofDim[(String, Option[Location])](capacity)
  private var pos: Int = 0
  private var size: Int = 0

  def add(message: String, optLocation: Option[Location]): Unit = {
    if (logger.isDebugEnabled) {
      logger.debug(s"${Pretty.prettyLoc(optLocation).renderWideStream.mkString}: $message")
    }
    buffer(pos) = (message, optLocation)
    pos = (pos + 1) % capacity
    if (size < capacity)
      size += 1
  }

  def iterator: Iterator[(String, Option[Location])] =
    new RingIterator(if (size < capacity) 0 else pos, size, buffer)
}

private final class RingIterator[A](ringStart: Int, ringSize: Int, buffer: Array[A])
    extends Iterator[A] {
  private var pos: Int = ringStart
  private var first = true
  private def nextPos: Int = (pos + 1) % ringSize
  def hasNext: Boolean = ringSize != 0 && (first || pos != ringStart)
  def next: A = {
    val x = buffer(pos)
    first = false
    pos = nextPos
    x
  }
} 
Example 16
Source File: Slf4JLogger.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.client.binding.util

import akka.stream._
import akka.stream.stage.{GraphStage, GraphStageLogic, InHandler, OutHandler}
import org.slf4j.Logger

final case class Slf4JLogger[T, U](
    logger: Logger,
    prefix: String,
    project: T => U,
    logDemand: Boolean = false)
    extends GraphStage[FlowShape[T, T]] {

  override def toString = "Slf4JLog"

  val in: Inlet[T] = Inlet[T]("in")
  val out: Outlet[T] = Outlet[T]("out")

  override def shape: FlowShape[T, T] = FlowShape(in, out)

  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =
    new GraphStageLogic(shape) with OutHandler with InHandler {

      override def onPush(): Unit = {

        val elem = grab(in)
        if (logger.isDebugEnabled) logger.debug("[{}] Element: {}", prefix, project(elem))
        push(out, elem)
      }

      override def onPull(): Unit = {
        if (logDemand) logger.debug("[{}] Demand", prefix)
        pull(in)
      }

      override def onUpstreamFailure(cause: Throwable): Unit = {
        logger.warn(s"[$prefix] Upstream failed", cause)

        super.onUpstreamFailure(cause)
      }

      override def onUpstreamFinish(): Unit = {
        logger.debug("[{}] Upstream finished.", prefix)

        super.onUpstreamFinish()
      }

      override def onDownstreamFinish(cause: Throwable): Unit = {
        logger.debug("[{}] Downstream finished.", prefix)

        super.onDownstreamFinish(cause)
      }

      setHandlers(in, out, this)
    }
}

object Slf4JLogger {
  def apply[T](logger: Logger, prefix: String): Slf4JLogger[T, T] =
    new Slf4JLogger(logger, prefix, identity)
} 
Example 17
Source File: Main.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.codegen

import java.io.File
import java.nio.file.Path

import ch.qos.logback.classic.Level
import com.daml.lf.codegen.conf.Conf
import com.typesafe.scalalogging.StrictLogging
import org.slf4j.{Logger, LoggerFactory}
import scalaz.Cord

import scala.collection.breakOut

object Main extends StrictLogging {

  private val codegenId = "Scala Codegen"

  @deprecated("Use codegen font-end: com.daml.codegen.CodegenMain.main", "0.13.23")
  def main(args: Array[String]): Unit =
    Conf.parse(args) match {
      case Some(conf) =>
        generateCode(conf)
      case None =>
        throw new IllegalArgumentException(
          s"Invalid ${codegenId: String} command line arguments: ${args.mkString(" "): String}")
    }

  def generateCode(conf: Conf): Unit = conf match {
    case Conf(darMap, outputDir, decoderPkgAndClass, verbosity, roots) =>
      setGlobalLogLevel(verbosity)
      logUnsupportedEventDecoderOverride(decoderPkgAndClass)
      val (dars, packageName) = darsAndOnePackageName(darMap)
      CodeGen.generateCode(dars, packageName, outputDir.toFile, CodeGen.Novel, roots)
  }

  private def setGlobalLogLevel(verbosity: Level): Unit = {
    LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME) match {
      case a: ch.qos.logback.classic.Logger =>
        a.setLevel(verbosity)
        logger.info(s"${codegenId: String} verbosity: ${verbosity.toString}")
      case _ =>
        logger.warn(s"${codegenId: String} cannot set requested verbosity: ${verbosity.toString}")
    }
  }

  private def logUnsupportedEventDecoderOverride(mapping: Option[(String, String)]): Unit =
    mapping.foreach {
      case (a, b) =>
        logger.warn(
          s"${codegenId: String} does not allow overriding Event Decoder, skipping: ${a: String} -> ${b: String}")
    }

  private def darsAndOnePackageName(darMap: Map[Path, Option[String]]): (List[File], String) = {
    val dars: List[File] = darMap.keys.map(_.toFile)(breakOut)
    val uniquePackageNames: Set[String] = darMap.values.collect { case Some(x) => x }(breakOut)
    uniquePackageNames.toSeq match {
      case Seq(packageName) =>
        (dars, packageName)
      case _ =>
        throw new IllegalStateException(
          s"${codegenId: String} expects all dars mapped to the same package name, " +
            s"requested: ${format(darMap): String}")
    }
  }

  private def format(map: Map[Path, Option[String]]): String = {
    val cord = map.foldLeft(Cord("{")) { (str, kv) =>
      str ++ kv._1.toFile.getAbsolutePath ++ "->" ++ kv._2.toString ++ ","
    }
    (cord ++ "}").toString
  }
} 
Example 18
Source File: ContextualizedLogger.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.logging

import akka.NotUsed
import akka.stream.scaladsl.Flow
import com.daml.grpc.GrpcException
import io.grpc.Status
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.concurrent.TrieMap
import scala.util.{Failure, Try}
import scala.util.control.NonFatal

object ContextualizedLogger {

  // Caches loggers to prevent them from needlessly wasting memory
  // Replicates the behavior of the underlying Slf4j logger factory
  private[this] val cache = TrieMap.empty[String, ContextualizedLogger]

  // Allows to explicitly pass a logger, should be used for testing only
  private[logging] def createFor(withoutContext: Logger): ContextualizedLogger =
    new ContextualizedLogger(withoutContext)

  // Slf4j handles the caching of the underlying logger itself
  private[logging] def createFor(name: String): ContextualizedLogger =
    createFor(LoggerFactory.getLogger(name))

  
  def get(clazz: Class[_]): ContextualizedLogger = {
    val name = clazz.getName.stripSuffix("$")
    cache.getOrElseUpdate(name, createFor(name))
  }

}

final class ContextualizedLogger private (val withoutContext: Logger) {

  val trace = new LeveledLogger.Trace(withoutContext)
  val debug = new LeveledLogger.Debug(withoutContext)
  val info = new LeveledLogger.Info(withoutContext)
  val warn = new LeveledLogger.Warn(withoutContext)
  val error = new LeveledLogger.Error(withoutContext)

  private def internalOrUnknown(code: Status.Code): Boolean =
    code == Status.Code.INTERNAL || code == Status.Code.UNKNOWN

  private def logError(t: Throwable)(implicit logCtx: LoggingContext): Unit =
    error("Unhandled internal error", t)

  def logErrorsOnCall[Out](implicit logCtx: LoggingContext): PartialFunction[Try[Out], Unit] = {
    case Failure(e @ GrpcException(s, _)) =>
      if (internalOrUnknown(s.getCode)) {
        logError(e)
      }
    case Failure(NonFatal(e)) =>
      logError(e)
  }

  def logErrorsOnStream[Out](implicit logCtx: LoggingContext): Flow[Out, Out, NotUsed] =
    Flow[Out].mapError {
      case e @ GrpcException(s, _) =>
        if (internalOrUnknown(s.getCode)) {
          logError(e)
        }
        e
      case NonFatal(e) =>
        logError(e)
        e
    }

} 
Example 19
Source File: LeveledLogger.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.logging

import org.slf4j.{Logger, Marker}

private[logging] object LeveledLogger {

  final class Trace(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isTraceEnabled()
    override protected def log(msg: String): Unit =
      logger.trace(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.trace(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.trace(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.trace(fmt, arg)
  }

  final class Debug(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isDebugEnabled()
    override protected def log(msg: String): Unit =
      logger.debug(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.debug(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.debug(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.debug(fmt, arg)
  }

  final class Info(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isInfoEnabled()
    override protected def log(msg: String): Unit =
      logger.info(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.info(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.info(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.info(fmt, arg)
  }

  final class Warn(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isWarnEnabled()
    override protected def log(msg: String): Unit =
      logger.warn(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.warn(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.warn(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.warn(fmt, arg)
  }

  final class Error(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isErrorEnabled()
    override protected def log(msg: String): Unit =
      logger.error(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.error(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.error(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.error(fmt, arg)
  }

}

private[logging] sealed abstract class LeveledLogger {

  protected def isEnabled: Boolean

  protected def log(msg: String): Unit
  protected def log(msg: String, t: Throwable): Unit
  protected def log(m: Marker, msg: String, t: Throwable): Unit
  protected def log(fmt: String, arg: AnyRef): Unit

  final def apply(msg: => String)(implicit logCtx: LoggingContext): Unit =
    if (isEnabled)
      logCtx.ifEmpty(log(msg))(log(s"$msg (context: {})", _))

  final def apply(msg: => String, t: Throwable)(implicit logCtx: LoggingContext): Unit =
    if (isEnabled)
      logCtx.ifEmpty(log(msg, t))(c => log(c, s"$msg (context: $c)", t))

} 
Example 20
Source File: TextClassifier.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.example.textclassification

import com.intel.analytics.bigdl.example.utils._
import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, _}
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T}
import org.apache.log4j.{Level => Levle4j, Logger => Logger4j}
import org.slf4j.{Logger, LoggerFactory}
import scopt.OptionParser

import scala.collection.mutable.{ArrayBuffer, Map => MMap}
import scala.language.existentials

object TextClassifier {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  LoggerFilter.redirectSparkInfoLogs()
  Logger4j.getLogger("com.intel.analytics.bigdl.optim").setLevel(Levle4j.INFO)

  def main(args: Array[String]): Unit = {
    val localParser = new OptionParser[TextClassificationParams]("BigDL Example") {
      opt[String]('b', "baseDir")
        .required()
        .text("Base dir containing the training and word2Vec data")
        .action((x, c) => c.copy(baseDir = x))
      opt[String]('p', "partitionNum")
        .text("you may want to tune the partitionNum if run into spark mode")
        .action((x, c) => c.copy(partitionNum = x.toInt))
      opt[String]('s', "maxSequenceLength")
        .text("maxSequenceLength")
        .action((x, c) => c.copy(maxSequenceLength = x.toInt))
      opt[String]('w', "maxWordsNum")
        .text("maxWordsNum")
        .action((x, c) => c.copy(maxWordsNum = x.toInt))
      opt[String]('l', "trainingSplit")
        .text("trainingSplit")
        .action((x, c) => c.copy(trainingSplit = x.toDouble))
      opt[String]('z', "batchSize")
        .text("batchSize")
        .action((x, c) => c.copy(batchSize = x.toInt))
      opt[Int]('l', "learningRate")
        .text("learningRate")
        .action((x, c) => c.copy(learningRate = x))
    }

    localParser.parse(args, TextClassificationParams()).map { param =>
      log.info(s"Current parameters: $param")
      val textClassification = new TextClassifier(param)
      textClassification.train()
    }
  }
} 
Example 21
Source File: TimestampLogicalType.scala    From embulk-output-s3_parquet   with MIT License 5 votes vote down vote up
package org.embulk.output.s3_parquet.parquet

import java.time.ZoneId

import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types}
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.{
  MICROS,
  MILLIS,
  NANOS
}
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.embulk.config.ConfigException
import org.embulk.output.s3_parquet.catalog.GlueDataType
import org.embulk.spi.`type`.{
  BooleanType,
  DoubleType,
  JsonType,
  LongType,
  StringType,
  TimestampType
}
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
import org.embulk.spi.Column
import org.msgpack.value.Value
import org.slf4j.{Logger, LoggerFactory}

case class TimestampLogicalType(
    isAdjustedToUtc: Boolean,
    timeUnit: TimeUnit,
    timeZone: ZoneId
) extends ParquetColumnType {
  private val logger: Logger =
    LoggerFactory.getLogger(classOf[TimestampLogicalType])

  override def primitiveType(column: Column): PrimitiveType =
    column.getType match {
      case _: LongType | _: TimestampType =>
        Types
          .optional(PrimitiveTypeName.INT64)
          .as(LogicalTypeAnnotation.timestampType(isAdjustedToUtc, timeUnit))
          .named(column.getName)
      case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def glueDataType(column: Column): GlueDataType =
    column.getType match {
      case _: LongType | _: TimestampType =>
        timeUnit match {
          case MILLIS => GlueDataType.TIMESTAMP
          case MICROS | NANOS =>
            warningWhenConvertingTimestampToGlueType(GlueDataType.BIGINT)
            GlueDataType.BIGINT
        }
      case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit =
    throw newUnsupportedMethodException("consumeBoolean")
  override def consumeString(consumer: RecordConsumer, v: String): Unit =
    throw newUnsupportedMethodException("consumeString")

  override def consumeLong(consumer: RecordConsumer, v: Long): Unit =
    consumer.addLong(v)

  override def consumeDouble(consumer: RecordConsumer, v: Double): Unit =
    throw newUnsupportedMethodException("consumeDouble")

  override def consumeTimestamp(
      consumer: RecordConsumer,
      v: Timestamp,
      formatter: TimestampFormatter
  ): Unit = timeUnit match {
    case MILLIS => consumer.addLong(v.toEpochMilli)
    case MICROS =>
      consumer.addLong(v.getEpochSecond * 1_000_000L + (v.getNano / 1_000L))
    case NANOS =>
      consumer.addLong(v.getEpochSecond * 1_000_000_000L + v.getNano)
  }

  override def consumeJson(consumer: RecordConsumer, v: Value): Unit =
    throw newUnsupportedMethodException("consumeJson")

  private def warningWhenConvertingTimestampToGlueType(
      glueType: GlueDataType
  ): Unit =
    logger.warn(
      s"timestamp(isAdjustedToUtc = $isAdjustedToUtc, timeUnit = $timeUnit) is converted" +
        s" to Glue ${glueType.name} but this is not represented correctly, because Glue" +
        s" does not support time type. Please use `catalog.column_options` to define the type."
    )
} 
Example 22
Source File: JsonLogicalType.scala    From embulk-output-s3_parquet   with MIT License 5 votes vote down vote up
package org.embulk.output.s3_parquet.parquet
import org.apache.parquet.io.api.{Binary, RecordConsumer}
import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types}
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.embulk.config.ConfigException
import org.embulk.output.s3_parquet.catalog.GlueDataType
import org.embulk.spi.Column
import org.embulk.spi.`type`.{
  BooleanType,
  DoubleType,
  JsonType,
  LongType,
  StringType,
  TimestampType
}
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
import org.msgpack.value.{Value, ValueFactory}
import org.slf4j.{Logger, LoggerFactory}

object JsonLogicalType extends ParquetColumnType {
  private val logger: Logger = LoggerFactory.getLogger(JsonLogicalType.getClass)
  override def primitiveType(column: Column): PrimitiveType =
    column.getType match {
      case _: BooleanType | _: LongType | _: DoubleType | _: StringType |
          _: JsonType =>
        Types
          .optional(PrimitiveTypeName.BINARY)
          .as(LogicalTypeAnnotation.jsonType())
          .named(column.getName)
      case _: TimestampType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def glueDataType(column: Column): GlueDataType =
    column.getType match {
      case _: BooleanType | _: LongType | _: DoubleType | _: StringType |
          _: JsonType =>
        warningWhenConvertingJsonToGlueType(GlueDataType.STRING)
        GlueDataType.STRING
      case _: TimestampType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit =
    consumeJson(consumer, ValueFactory.newBoolean(v))

  override def consumeString(consumer: RecordConsumer, v: String): Unit =
    consumeJson(consumer, ValueFactory.newString(v))

  override def consumeLong(consumer: RecordConsumer, v: Long): Unit =
    consumeJson(consumer, ValueFactory.newInteger(v))

  override def consumeDouble(consumer: RecordConsumer, v: Double): Unit =
    consumeJson(consumer, ValueFactory.newFloat(v))

  override def consumeTimestamp(
      consumer: RecordConsumer,
      v: Timestamp,
      formatter: TimestampFormatter
  ): Unit = throw newUnsupportedMethodException("consumeTimestamp")

  override def consumeJson(consumer: RecordConsumer, v: Value): Unit =
    consumer.addBinary(Binary.fromString(v.toJson))

  private def warningWhenConvertingJsonToGlueType(
      glueType: GlueDataType
  ): Unit = {
    logger.warn(
      s"json is converted" +
        s" to Glue ${glueType.name} but this is not represented correctly, because Glue" +
        s" does not support json type. Please use `catalog.column_options` to define the type."
    )
  }

} 
Example 23
Source File: ScorexLogging.scala    From matcher   with MIT License 5 votes vote down vote up
package com.wavesplatform.dex.domain.utils

import monix.eval.Task
import monix.execution.{CancelableFuture, Scheduler}
import org.slf4j.{Logger, LoggerFactory}

case class LoggerFacade(logger: Logger) {

  def trace(message: => String): Unit                       = if (logger.isTraceEnabled) logger.trace(message)
  def debug(message: => String, arg: Any): Unit             = if (logger.isDebugEnabled) logger.debug(message, arg)
  def debug(message: => String): Unit                       = if (logger.isDebugEnabled) logger.debug(message)
  def info(message: => String): Unit                        = if (logger.isInfoEnabled) logger.info(message)
  def info(message: => String, arg: Any): Unit              = if (logger.isInfoEnabled) logger.info(message, arg)
  def info(message: => String, throwable: Throwable): Unit  = if (logger.isInfoEnabled) logger.info(message, throwable)
  def warn(message: => String): Unit                        = if (logger.isWarnEnabled) logger.warn(message)
  def warn(message: => String, throwable: Throwable): Unit  = if (logger.isWarnEnabled) logger.warn(message, throwable)
  def error(message: => String): Unit                       = if (logger.isErrorEnabled) logger.error(message)
  def error(message: => String, throwable: Throwable): Unit = if (logger.isErrorEnabled) logger.error(message, throwable)
}

trait ScorexLogging {

  protected lazy val log: LoggerFacade = LoggerFacade(LoggerFactory.getLogger(this.getClass))

  implicit class TaskExt[A](t: Task[A]) {

    def runAsyncLogErr(implicit s: Scheduler): CancelableFuture[A] = logErr.runToFuture(s)

    def logErr: Task[A] = t.onErrorHandleWith { ex =>
      log.error(s"Error executing task", ex)
      Task.raiseError[A](ex)
    }
  }
} 
Example 24
Source File: EmbeddedCassandra.scala    From phantom-activator-template   with Apache License 2.0 5 votes vote down vote up
package controllers

import java.io.File
import java.util.concurrent.atomic.AtomicBoolean

import org.cassandraunit.utils.EmbeddedCassandraServerHelper
import org.slf4j.Logger

import scala.concurrent.blocking
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}


  def start(logger: Logger, config: Option[File] = None, timeout: Option[Int] = None): Unit = {
    this.synchronized {
      if (started.compareAndSet(false, true)) {
        blocking {
          val configFile = config.map(_.toURI.toString) getOrElse EmbeddedCassandraServerHelper.DEFAULT_CASSANDRA_YML_FILE
          System.setProperty("cassandra.config", configFile)
          Try {
            EmbeddedCassandraServerHelper.mkdirs()
          } match {
            case Success(value) => logger.info("Successfully created directories for embedded Cassandra.")
            case Failure(NonFatal(e)) =>
              logger.error(s"Error creating Embedded cassandra directories: ${e.getMessage}")
          }

          (config, timeout) match {
            case (Some(file), None) =>
              logger.info(s"Starting Cassandra in embedded mode with configuration from $file.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra(
                file,
                EmbeddedCassandraServerHelper.DEFAULT_TMP_DIR,
                EmbeddedCassandraServerHelper.DEFAULT_STARTUP_TIMEOUT
              )
            case (Some(file), Some(time)) =>
              logger.info(s"Starting Cassandra in embedded mode with configuration from $file and timeout set to $timeout ms.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra(
                file,
                EmbeddedCassandraServerHelper.DEFAULT_TMP_DIR,
                time
              )

            case (None, Some(time)) =>
              logger.info(s"Starting Cassandra in embedded mode with default configuration and timeout set to $timeout ms.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra(time)
            case (None, None) =>
              logger.info("Starting Cassandra in embedded mode with default configuration.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra()
              logger.info("Successfully started embedded Cassandra")
          }
        }
      }
      else {
        logger.info("Embedded Cassandra has already been started")
      }
    }
  }


  def cleanup(logger: Logger): Unit = {
    this.synchronized {
      if (started.compareAndSet(true, false)) {
        logger.info("Cleaning up embedded Cassandra")
        EmbeddedCassandraServerHelper.cleanEmbeddedCassandra()
      } else {
        logger.info("Cassandra is not running, not cleaning up")
      }
    }
  }
} 
Example 25
Source File: KafkaSink.scala    From spark-kafka-sink   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.metrics.sink

import java.util.{ Properties, Locale }
import java.util.concurrent.TimeUnit

import org.slf4j.Logger
import org.slf4j.LoggerFactory

import com.codahale.metrics.MetricRegistry
import org.apache.spark.SecurityManager

import com.manyangled.kafkasink.KafkaReporter

class KafkaSink(val properties: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends org.apache.spark.metrics.sink.Sink {

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private def popt(prop: String): Option[String] =
    Option(properties.getProperty(prop))

  // These are non-negotiable
  val broker = popt("broker").get
  val topic = popt("topic").get

  lazy val reporter = new KafkaReporter(registry, broker, topic, properties)

  def start(): Unit = {
    logger.info(s"Starting Kafka metric reporter at $broker, topic $topic")
    val period = popt("period").getOrElse("10").toLong
    val tstr = popt("unit").getOrElse("seconds").toUpperCase(Locale.ROOT)
    val tunit = TimeUnit.valueOf(tstr)
    reporter.start(period, tunit)
  }

  def stop(): Unit = {
    logger.info(s"Stopping Kafka metric reporter at $broker, topic $topic")
    reporter.stop()
  }

  def report(): Unit = {
    logger.info(s"Reporting metrics to Kafka reporter at $broker, topic $topic")
    reporter.report()
  }
} 
Example 26
Source File: Demo2iConfig.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.spark.examples.demos.fbl
import com.basho.riak.client.core.RiakNode
import com.basho.riak.client.core.query.Namespace
import com.basho.riak.spark.rdd._
import com.basho.riak.client.core.query.indexes.LongIntIndex
import com.basho.riak.spark.rdd.connector.RiakConnectorConf
import com.basho.riak.spark.rdd.{RiakFunctions, BucketDef}
import com.basho.riak.spark.util.RiakObjectConversionUtil
import com.basho.riak.spark.writer.{WriteDataMapperFactory, WriteDataMapper}
import org.slf4j.{LoggerFactory, Logger}
import com.basho.riak.spark._
import com.basho.riak.client.core.query.{RiakObject, Namespace}
import com.basho.riak.client.api.annotations.{RiakKey, RiakIndex}
import org.apache.spark.{SparkConf, SparkContext}

case class Demo2iConfig(riakConf: RiakConnectorConf, index: String, bucket: String, from: Long, to: Long, name:String){

  def riakNodeBuilder(minConnections:Int = 2):RiakNode.Builder = {
    val firstTheWinner = riakConf.hosts.iterator.next()

    new RiakNode.Builder()
      .withMinConnections(minConnections)
      .withRemoteAddress(firstTheWinner.getHost)
      .withRemotePort(firstTheWinner.getPort)
  }
}

object Demo2iConfig{

  val DEFAULT_INDEX_NAME = "creationNo"
  val DEFAULT_BUCKET_NAME = "test-bucket"
  val DEFAULT_FROM = 1
  val DEFAULT_TO = 4

  def apply(sparkConf: SparkConf):Demo2iConfig = {

    Demo2iConfig(
      riakConf = RiakConnectorConf(sparkConf),
      index = sparkConf.get("spark.riak.demo.index", DEFAULT_INDEX_NAME),
      bucket = sparkConf.get("spark.riak.demo.bucket", DEFAULT_BUCKET_NAME),
      from = sparkConf.get("spark.riak.demo.from", DEFAULT_FROM.toString).toLong,
      to = sparkConf.get("spark.riak.demo.to", DEFAULT_TO.toString).toLong,
      name = sparkConf.get("spark.app.name", "")
    )
  }
} 
Example 27
Source File: StatCounter.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.spark.rdd

import java.util.concurrent.atomic.LongAdder

import org.slf4j.Logger

import scala.concurrent.duration.Duration

class StatCounter(logger:Logger = null) {
  case class Stats(duration: Duration, counter:Long, logger:Logger = null){
    def dump(message: String, logger:Logger = this.logger ):Stats ={
      require(logger != null, "logger should be specified")
      logger.info("{}\n\t{} items were processed\n\tit took {}\n",
        List[AnyRef](message, counter: java.lang.Long, duration):_*)
      this
    }
  }

  private val counter = new LongAdder
  private var startedAt = System.currentTimeMillis()


  def increment():StatCounter = {
    counter.increment()
    this
  }

  def +=(value: Int): StatCounter = {
    counter.add(value)
    this
  }

  def +=(value: Long): StatCounter = {
    counter.add(value)
    this
  }

  def reset():StatCounter = {
    startedAt = System.currentTimeMillis()
    counter.reset()
    this
  }

  def stats():Stats ={
    val duration = System.currentTimeMillis() - startedAt
    new Stats(Duration(duration, "ms"), counter.longValue(), logger)
  }
}

object StatCounter{
  def apply(logger: Logger = null): StatCounter = {
    new StatCounter(logger)
  }
} 
Example 28
Source File: AbstractRiakTest.scala    From spark-riak-connector   with Apache License 2.0 5 votes vote down vote up
package com.basho.riak.spark.rdd

import com.basho.riak.JsonTestFunctions
import com.basho.riak.client.core.RiakNode
import com.basho.riak.client.core.query.Namespace
import org.junit._
import org.junit.rules.TestWatcher
import org.junit.runner.Description
import org.slf4j.{Logger, LoggerFactory}

abstract class AbstractRiakTest extends RiakFunctions with JsonTestFunctions {

  private final val logger: Logger = LoggerFactory.getLogger(this.getClass)

  protected val DEFAULT_NAMESPACE = new Namespace("default","test-bucket")
  protected val DEFAULT_NAMESPACE_4STORE = new Namespace("default", "test-bucket-4store")

  protected override val numberOfParallelRequests: Int = 4
  protected override val nodeBuilder: RiakNode.Builder = new RiakNode.Builder().withMinConnections(numberOfParallelRequests)

  protected val jsonData: Option[String] = None

  @Rule
  def watchman: TestWatcher = new TestWatcher() {
    override def starting(description: Description): Unit = {
      super.starting(description)
      logger.info(
        "\n----------------------------------------\n" +
          "  [TEST STARTED]  {}\n" +
          "----------------------------------------\n",
        description.getDisplayName)
    }

    override def finished(description: Description): Unit = {
      super.finished(description)
      logger.info(
        "\n----------------------------------------\n" +
          "  [TEST FINISHED]  {}\n" +
          "----------------------------------------\n",
        description.getDisplayName)
    }
  }

  @Before
  protected def initialize(): Unit = setupData()

  protected def setupData(): Unit = {
    // Purge data: data might be not only created, but it may be also changed during the previous test case execution
    //
    // For manual check: curl -v http://localhost:10018/buckets/test-bucket/keys?keys=true
    List(DEFAULT_NAMESPACE, DEFAULT_NAMESPACE_4STORE) foreach resetAndEmptyBucket

    withRiakDo(session => jsonData.foreach(createValues(session, DEFAULT_NAMESPACE, _)))
  }
} 
Example 29
Source File: UnorderedParallelParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import java.util.UUID

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.apache.parquet.schema.MessageType
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object UnorderedParallelParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             parallelism: Int,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val valueCodecConfiguration = options.toValueCodecConfiguration

    validateWritePath(path, options)

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .zipWithIndex
      .groupBy(parallelism, elemAndIndex => Math.floorMod(elemAndIndex._2, parallelism))
      .map(elemAndIndex => encode(elemAndIndex._1))
      .fold(UnorderedChunk(path, schema, options))(_.write(_))
      .map(_.close())
      .async
      .mergeSubstreamsWithParallelism(parallelism)
      .toMat(Sink.ignore)(Keep.right)
  }

  private trait UnorderedChunk {

    def write(record: RowParquetRecord): UnorderedChunk

    def close(): Unit

  }

  private object UnorderedChunk {

    def apply(basePath: Path,
              schema: MessageType,
              options: ParquetWriter.Options): UnorderedChunk = new PendingUnorderedChunk(basePath, schema, options)

    private[UnorderedChunk] class PendingUnorderedChunk(basePath: Path,
                                        schema: MessageType,
                                        options: ParquetWriter.Options) extends UnorderedChunk {
      override def write(record: RowParquetRecord): UnorderedChunk = {
        val chunkPath = Path.mergePaths(basePath, new Path(s"/part-${UUID.randomUUID()}.parquet"))
        val writer = ParquetWriter.internalWriter(chunkPath, schema, options)
        writer.write(record)
        new StartedUnorderedChunk(chunkPath, writer, acc = 1)
      }

      override def close(): Unit = ()
    }

    private[UnorderedChunk] class StartedUnorderedChunk(chunkPath: Path,
                                        writer: ParquetWriter.InternalWriter,
                                        acc: Long
                                       ) extends UnorderedChunk {
      override def write(record: RowParquetRecord): UnorderedChunk = {
        writer.write(record)
        new StartedUnorderedChunk(chunkPath, writer, acc = acc + 1)
      }

      override def close(): Unit = {
        if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath")
        writer.close()
      }
    }
  }

} 
Example 30
Source File: IndefiniteStreamParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s
import akka.stream.FlowShape
import akka.stream.scaladsl.{Broadcast, Flow, GraphDSL, Keep, Sink, ZipWith}
import com.github.mjakubowski84.parquet4s.ParquetWriter.ParquetWriterFactory
import org.apache.hadoop.fs.Path
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration.FiniteDuration


private[parquet4s] object IndefiniteStreamParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[In, ToWrite: ParquetWriterFactory, Mat](path: Path,
                                                    maxChunkSize: Int,
                                                    chunkWriteTimeWindow: FiniteDuration,
                                                    buildChunkPath: ChunkPathBuilder[In] = ChunkPathBuilder.default,
                                                    preWriteTransformation: In => ToWrite = identity[In] _,
                                                    postWriteSink: Sink[Seq[In], Mat] = Sink.ignore,
                                                    options: ParquetWriter.Options = ParquetWriter.Options()
                                            ): Sink[In, Mat] = {
    validateWritePath(path, options)

    val internalFlow = Flow.fromGraph(GraphDSL.create() { implicit b =>
      import GraphDSL.Implicits._
    
      val inChunkFlow = b.add(Flow[In].groupedWithin(maxChunkSize, chunkWriteTimeWindow))
      val broadcastChunks = b.add(Broadcast[Seq[In]](outputPorts = 2))
      val writeFlow = Flow[Seq[In]].map { chunk =>
        val toWrite = chunk.map(preWriteTransformation)
        val chunkPath = buildChunkPath(path, chunk)
        if (logger.isDebugEnabled()) logger.debug(s"Writing ${toWrite.size} records to $chunkPath")
        ParquetWriter.writeAndClose(chunkPath.toString, toWrite, options)
      }
      val zip = b.add(ZipWith[Seq[In], Unit, Seq[In]]((chunk, _) => chunk))
      
      inChunkFlow ~> broadcastChunks ~> writeFlow ~> zip.in1
                     broadcastChunks ~> zip.in0

      FlowShape(inChunkFlow.in, zip.out)               
    })

    internalFlow.toMat(postWriteSink)(Keep.right)
  }

} 
Example 31
Source File: SingleFileParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object SingleFileParquetSink {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val writer = ParquetWriter.internalWriter(path, schema, options)
    val valueCodecConfiguration = options.toValueCodecConfiguration
    val isDebugEnabled = logger.isDebugEnabled

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .map(encode)
      .fold(0) { case (acc, record) => writer.write(record); acc + 1}
      .map { count =>
        if (isDebugEnabled) logger.debug(s"$count records were successfully written to $path")
        writer.close()
      }
      .toMat(Sink.ignore)(Keep.right)
  }

} 
Example 32
Source File: IOOps.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.SecureIOUtils.AlreadyExistsException
import org.apache.parquet.hadoop.ParquetFileWriter
import org.slf4j.Logger

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Try

trait IOOps {

  protected val logger: Logger

  protected def validateWritePath(path: Path, writeOptions: ParquetWriter.Options): Unit = {
    val fs = path.getFileSystem(writeOptions.hadoopConf)
    try {
      if (fs.exists(path)) {
        if (writeOptions.writeMode == ParquetFileWriter.Mode.CREATE)
          throw new AlreadyExistsException(s"File or directory already exists: $path")
        else {
          if (logger.isDebugEnabled) logger.debug(s"Deleting $path in order to override with new data.")
          fs.delete(path, true)
        }
      }
    } finally fs.close()
  }

  protected def filesAtPath(path: Path, writeOptions: ParquetWriter.Options)
                           (implicit ec: ExecutionContext): Future[List[String]] = Future {
    scala.concurrent.blocking {
      val fs = path.getFileSystem(writeOptions.hadoopConf)
      try {
        val iter = fs.listFiles(path, false)
        Stream
          .continually(Try(iter.next()))
          .takeWhile(_.isSuccess)
          .map(_.get)
          .map(_.getPath.getName)
          .toList
      } finally fs.close()
    }
  }

  protected def filesAtPath(path: String, writeOptions: ParquetWriter.Options)
                           (implicit ec: ExecutionContext): Future[List[String]] = filesAtPath(new Path(path), writeOptions)

} 
Example 33
Source File: SequentialFileSplittingParquetSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.apache.parquet.schema.MessageType
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object SequentialFileSplittingParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             maxRecordsPerFile: Long,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val valueCodecConfiguration = options.toValueCodecConfiguration

    validateWritePath(path, options)

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .zipWithIndex
      .map { case (elem, index) => OrderedChunkElem(encode(elem), index) }
      .fold(OrderedChunk(path, schema, maxRecordsPerFile, options))(_.write(_))
      .map(_.close())
      .toMat(Sink.ignore)(Keep.right)
  }

  private case class OrderedChunkElem(record: RowParquetRecord, index: Long) {
    def isSplit(maxRecordsPerFile: Long): Boolean = index % maxRecordsPerFile == 0
  }

  private trait OrderedChunk {
    def write(elem: OrderedChunkElem): OrderedChunk
    def close(): Unit
  }

  private object OrderedChunk {

    def apply(basePath: Path,
              schema: MessageType,
              maxRecordsPerFile: Long,
              options: ParquetWriter.Options): OrderedChunk = new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options)


    private[OrderedChunk] class PendingOrderedChunk(basePath: Path,
                                                    schema: MessageType,
                                                    maxRecordsPerFile: Long,
                                                    options: ParquetWriter.Options) extends OrderedChunk {
      override def write(elem: OrderedChunkElem): OrderedChunk = {
        val chunkNumber: Int = Math.floorDiv(elem.index, maxRecordsPerFile).toInt
        val chunkPath = Path.mergePaths(basePath, new Path(chunkFileName(chunkNumber)))
        val writer = ParquetWriter.internalWriter(chunkPath, schema, options)
        writer.write(elem.record)
        new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = 1)
      }

      override def close(): Unit = ()

      private def chunkFileName(chunkNumber: Int): String = f"/part-$chunkNumber%05d.parquet"
    }

    private[OrderedChunk] class StartedOrderedChunk(basePath: Path,
                                                    schema: MessageType,
                                                    maxRecordsPerFile: Long,
                                                    options: ParquetWriter.Options,
                                                    chunkPath: Path,
                                                    writer: ParquetWriter.InternalWriter,
                                                    acc: Long) extends OrderedChunk {
      override def write(elem: OrderedChunkElem): OrderedChunk = {
        if (elem.isSplit(maxRecordsPerFile)) {
          this.close()
          new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options).write(elem)
        } else {
          writer.write(elem.record)
          new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = acc + 1)
        }
      }

      override def close(): Unit = {
        if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath")
        writer.close()
      }
    }
  }

} 
Example 34
Source File: SddfApp.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf

import org.joda.time.format.PeriodFormatterBuilder
import org.slf4j.Logger
import org.slf4j.LoggerFactory

import de.unihamburg.vsis.sddf.config.Config

import scopt.Read
import scopt.OptionParser

class SddfApp extends App {

  val periodFormatter = (new PeriodFormatterBuilder() minimumPrintedDigits (2) printZeroAlways ()
    appendDays () appendSeparator ("d ")
    appendHours () appendSeparator (":") appendMinutes () appendSuffix (":") appendSeconds ()
    appendSeparator (".")
    minimumPrintedDigits (3) appendMillis () toFormatter)

  @transient var _log: Logger = null
  // Method to get or create the logger for this object
  def log(): Logger = {
    if (_log == null) {
      _log = LoggerFactory.getLogger(getClass.getName)
    }
    _log
  }
  
  @transient var _logLineage: Logger = null
  // Method to get or create the logger for this object
  def logLineage(): Logger = {
    if (_logLineage == null) {
      _logLineage = LoggerFactory.getLogger("lineage")
    }
    _logLineage
  }
  

  // extend Parser to accept the type Option
  implicit val optionRead: Read[Option[String]] = Read.reads(Some(_))
  
  // parsing commandline parameters
  val parser = new OptionParser[Parameters]("sddf") {
    head("SddF", "0.1.0")
    opt[Map[String, String]]('p', "properties") optional() valueName("<property>") action { (x, c) =>
      c.copy(properties = x) } text("set arbitrary properties via command line")
    opt[Option[String]]('c', "config-file") optional() action { (x, c) =>
      c.copy(propertyPath = x) } text("optional path to a property file")
  }
  
  // parser.parse returns Option[C]
  val parameters = parser.parse(args, Parameters())
  var propertiesCommandline: Map[String, String] = Map()
  var propertiesPath: Option[String] = None
   parameters match {
    case Some(config) =>
      propertiesCommandline = config.properties
      propertiesPath = config.propertyPath
    case None =>
      // arguments are bad, error message will have been displayed
  }
  
  val Conf: Config = if(propertiesPath.isDefined) new Config(propertiesPath.get) else new Config()
  
  propertiesCommandline.foreach(props => {
	  Conf.setPropertyCommandline(props._1, props._2)
  })
  
}

case class Parameters(propertyPath: Option[String] = None, properties: Map[String,String] = Map()) 
Example 35
Source File: MetaCatalogProcessor.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package it.gov.daf.ingestion.metacatalog

import com.typesafe.config.ConfigFactory
import play.api.libs.json._
import it.gov.daf.catalogmanager._
import it.gov.daf.catalogmanager.json._
import org.slf4j.{Logger, LoggerFactory}
import org.apache.commons.lang.StringEscapeUtils

//Get Logical_uri, process MetadataCatalog and get the required info
class MetaCatalogProcessor(metaCatalog: MetaCatalog) {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val sftpDefPrefix = ConfigFactory.load().getString("ingmgr.sftpdef.prefixdir")

  
  def separator() = {
    metaCatalog.operational
      .input_src.sftp
      .flatMap(_.headOption)
      .flatMap(_.param)
      .flatMap(_.split(", ").reverse.headOption)
      .map(_.replace("sep=", ""))
      .getOrElse(",")
  }

  def fileFormatNifi(): String = {
    val inputSftp = metaCatalog.operational.input_src.sftp

    inputSftp match {
      case Some(s) =>
        val sftps: Seq[SourceSftp] = s.filter(x => x.name.equals("sftp_daf"))
        if (sftps.nonEmpty) sftps.head.param.getOrElse("")
        else ""

      case None => ""
    }
  }

  def ingPipelineNifi(): String = {
    ingPipeline.mkString(",")
  }

} 
Example 36
Source File: KuduController.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package controllers

import org.apache.kudu.spark.kudu._
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.slf4j.{ Logger, LoggerFactory }

import scala.util.{ Failure, Try }

class KuduController(sparkSession: SparkSession, master: String) {

  val alogger: Logger = LoggerFactory.getLogger(this.getClass)

  def readData(table: String): Try[DataFrame] =  Try{
    sparkSession
      .sqlContext
      .read
      .options(Map("kudu.master" -> master, "kudu.table" -> table)).kudu
  }.recoverWith {
    case ex =>
      alogger.error(s"Exception ${ex.getMessage}\n ${ex.getStackTrace.mkString("\n")} ")
      Failure(ex)
  }
} 
Example 37
Source File: PhysicalDatasetController.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package controllers

import cats.syntax.show.toShow
import com.typesafe.config.Config
import daf.dataset.{ DatasetParams, FileDatasetParams, KuduDatasetParams }
import daf.filesystem.fileFormatShow
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.apache.spark.SparkConf
import org.slf4j.{ Logger, LoggerFactory }

class PhysicalDatasetController(sparkSession: SparkSession,
                                kuduMaster: String,
                                defaultLimit: Option[Int] = None,
                                defaultChunkSize: Int = 0) {

  lazy val kuduController = new KuduController(sparkSession, kuduMaster)
  lazy val hdfsController = new HDFSController(sparkSession)

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private def addLimit(dataframe: DataFrame, limit: Option[Int]) = (limit, defaultLimit) match {
    case (None, None)                 => dataframe
    case (None, Some(value))          => dataframe.limit { value }
    case (Some(value), None)          => dataframe.limit { value }
    case (Some(value), Some(default)) => dataframe.limit { math.min(value, default) }
  }

  def kudu(params: KuduDatasetParams, limit: Option[Int] = None) = {
    logger.debug { s"Reading data from kudu table [${params.table}]" }
    kuduController.readData(params.table).map { addLimit(_, limit) }
  }

  def hdfs(params: FileDatasetParams, limit: Option[Int] = None) = {
    logger.debug { s"Reading data from hdfs at path [${params.path}]" }
    hdfsController.readData(params.path, params.format.show, params.param("separator")).map { addLimit(_, limit) }
  }

  def get(params: DatasetParams, limit: Option[Int]= None) = params match {
    case kuduParams: KuduDatasetParams => kudu(kuduParams, limit)
    case hdfsParams: FileDatasetParams => hdfs(hdfsParams, limit)
  }

}

object PhysicalDatasetController {

  private def getOptionalString(path: String, underlying: Config) = {
    if (underlying.hasPath(path)) {
      Some(underlying.getString(path))
    } else {
      None
    }
  }

  private def getOptionalInt(path: String, underlying: Config) = {
    if (underlying.hasPath(path)) {
      Some(underlying.getInt(path))
    } else {
      None
    }
  }

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply(configuration: Config): PhysicalDatasetController = {

    val sparkConfig = new SparkConf()
    sparkConfig.set("spark.driver.memory", configuration.getString("spark.driver.memory"))

    val sparkSession = SparkSession.builder().master("local").config(sparkConfig).getOrCreate()

    val kuduMaster = configuration.getString("kudu.master")

    val defaultLimit = if (configuration hasPath "daf.row_limit") Some {
      configuration.getInt("daf.row_limit")
    } else None

    System.setProperty("sun.security.krb5.debug", "true")

    new PhysicalDatasetController(sparkSession, kuduMaster, defaultLimit)
  }
} 
Example 38
Source File: HDFSController.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package controllers

import com.databricks.spark.avro._
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.slf4j.{Logger, LoggerFactory}

import scala.util.{Failure, Try}

class HDFSController(sparkSession: SparkSession) {

  val alogger: Logger = LoggerFactory.getLogger(this.getClass)

  def readData(path: String, format: String, separator: Option[String]): Try[DataFrame] =  format match {
    case "csv" => Try {
      val pathFixAle = path + "/" + path.split("/").last + ".csv"
      alogger.debug(s"questo e' il path $pathFixAle")
      separator match {
        case None => sparkSession.read.csv(pathFixAle)
        case Some(sep) => sparkSession.read.format("csv")
          .option("sep", sep)
          .option("inferSchema", "true")
          .option("header", "true")
          .load(pathFixAle)
      }
    }
    case "parquet" => Try { sparkSession.read.parquet(path) }
    case "avro"    => Try { sparkSession.read.avro(path) }
    case unknown   => Failure { new IllegalArgumentException(s"Unsupported format [$unknown]") }
  }
} 
Example 39
Source File: CleanupStatistics.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package daf.dataset.export.cleanup

import org.slf4j.Logger

sealed case class CleanupStatistics(successes: List[SuccessfulAttempt], failures: List[FailedAttempt], timeElapsed: Long) {

  lazy val fatalFailures    = failures.filter { _.reason.nonEmpty }

  lazy val nonFatalFailures = failures.filter { _.reason.isEmpty }

  private def logSuccesses(logger: Logger) = if (successes.nonEmpty) {
    logger.info { s"Successfully deleted [${successes.size}] path(s)" }
    successes.foreach {
      case SuccessfulAttempt(path) => logger.debug { s"${path.toString}" }
    }
  }

  private def logFailures(logger: Logger) = if (failures.nonEmpty) {
    logger.warn { s"Failed to deleted [${successes.size}] path(s)" }
    failures.foreach {
      case FailedAttempt(path, None)         => logger.warn { s"${path.toString} - reason unknown" }
      case FailedAttempt(path, Some(reason)) => logger.warn(s"${path.toString}", reason)
    }
  }

  def log(logger: Logger) = {
    logSuccesses(logger)
    logFailures(logger)
    logger.info { s"Cleanup finished in [$timeElapsed] millisecond(s)" }
  }

}

object CleanupStatistics {

  private def splitAttempts(attempts: List[CleanupAttempt],
                            successes: List[SuccessfulAttempt] = List.empty[SuccessfulAttempt],
                            failures: List[FailedAttempt] = List.empty[FailedAttempt]): (List[SuccessfulAttempt], List[FailedAttempt]) = attempts match {
    case (attempt: SuccessfulAttempt) :: tail => splitAttempts(tail, attempt  :: successes, failures)
    case (attempt: FailedAttempt)     :: tail => splitAttempts(tail, successes, attempt  :: failures)
    case Nil                                  => (successes, failures)
  }

  def collect(attempts: List[CleanupAttempt], timeElapsed: Long) = splitAttempts(attempts) match {
    case (successes, failures) => apply(successes, failures, timeElapsed)
  }

} 
Example 40
Source File: HiveEngineManagerSpringConfiguration.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.enginemanager.hive.conf

import com.webank.wedatasphere.linkis.enginemanager.EngineHook
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration
import com.webank.wedatasphere.linkis.enginemanager.hook.{ConsoleConfigurationEngineHook, JarLoaderEngineHook}
import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy}
import com.webank.wedatasphere.linkis.rpc.Sender
import org.slf4j.{Logger, LoggerFactory}
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class HiveEngineManagerSpringConfiguration {

  private val logger:Logger = LoggerFactory.getLogger(getClass)

  @Bean(Array("resources"))
  def createResource(): ModuleInfo = {
    val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong ,
        EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue)


    val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong,
        EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue)
    logger.info("create resource for hive")
    ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance)
  }

  @Bean(name = Array("hooks"))
  def createEngineHook(): Array[EngineHook] = {
    Array(new ConsoleConfigurationEngineHook, new JarLoaderEngineHook)// TODO
  }


} 
Example 41
Source File: PipeLineManagerSpringConfiguration.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.enginemanager.pipeline

import com.webank.wedatasphere.linkis.enginemanager.EngineCreator
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration
import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy}
import com.webank.wedatasphere.linkis.rpc.Sender
import org.slf4j.{Logger, LoggerFactory}
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class PipeLineManagerSpringConfiguration {
  private val logger:Logger = LoggerFactory.getLogger(getClass)

  @Bean(Array("engineCreator"))
  def createEngineCreator(): EngineCreator =new PipeLineDefaultEngineCreator

  @Bean(Array("resources"))
  def createResource(): ModuleInfo = {
    val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong ,
      EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue)


    val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong,
      EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_INSTANCES.getValue)
    logger.info("create resource for pipeline")
    ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance)
  }
} 
Example 42
Source File: CommentInterceptor.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.interceptor.impl

import java.lang
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor
import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask
import com.webank.wedatasphere.linkis.protocol.task.Task
import org.slf4j.{Logger, LoggerFactory}

import scala.util.matching.Regex

"
  override def dealComment(code: String): String = {
    val p = Pattern.compile(scalaCommentPattern)
    p.matcher(code).replaceAll("$1")
  }
}


object CommentMain{
  def main(args: Array[String]): Unit = {
    val sqlCode = "select * from default.user;--你好;show tables"
    val sqlCode1 = "select * from default.user--你好;show tables"
    println(SQLCommentHelper.dealComment(sqlCode))
  }
} 
Example 43
Source File: EntranceGroupFactory.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.scheduler

import com.webank.wedatasphere.linkis.entrance.conf.EntranceConfiguration
import com.webank.wedatasphere.linkis.entrance.execute.EntranceJob
import com.webank.wedatasphere.linkis.entrance.persistence.HaPersistenceTask
import com.webank.wedatasphere.linkis.protocol.config.{RequestQueryAppConfig, ResponseQueryConfig}
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue.ParallelGroup
import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, SchedulerEvent}
import com.webank.wedatasphere.linkis.server.JMap
import org.apache.commons.lang.StringUtils
import org.slf4j.{Logger, LoggerFactory}


class EntranceGroupFactory extends GroupFactory {

  private val groupNameToGroups = new JMap[String, Group]
  private val logger:Logger = LoggerFactory.getLogger(classOf[EntranceGroupFactory])
  override def getOrCreateGroup(groupName: String): Group = {
    if(!groupNameToGroups.containsKey(groupName)) synchronized{
      //TODO Query the database and get initCapacity, maxCapacity, maxRunningJobs, maxAskExecutorTimes(查询数据库,拿到initCapacity、maxCapacity、maxRunningJobs、maxAskExecutorTimes)
      val initCapacity = 100
      val maxCapacity = 100
      var maxRunningJobs =  EntranceConfiguration.WDS_LINKIS_INSTANCE.getValue
      val maxAskExecutorTimes = EntranceConfiguration.MAX_ASK_EXECUTOR_TIME.getValue.toLong
      if (groupName.split("_").length < 2){
        logger.warn(s"name style of group: $groupName is not correct, we will set default value for the group")
      }else{
        val sender:Sender = Sender.getSender(EntranceConfiguration.CLOUD_CONSOLE_CONFIGURATION_SPRING_APPLICATION_NAME.getValue)
        val creator = groupName.split("_")(0)
        val username = groupName.split("_")(1)
        val engineName = EntranceConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue
        val engineType = if (engineName.trim().toLowerCase().contains("engine")) engineName.substring(0, engineName.length - "engine".length) else "spark"
        logger.info(s"Getting parameters for $groupName(正在为 $groupName 获取参数) username: $username, creator:$creator, engineType: $engineType")
        val keyAndValue = sender.ask(RequestQueryAppConfig(username, creator, engineType)).asInstanceOf[ResponseQueryConfig].getKeyAndValue
        try{
          maxRunningJobs = Integer.parseInt(keyAndValue.get(EntranceConfiguration.WDS_LINKIS_INSTANCE.key))
        }catch{
          case t:Throwable => logger.warn("Get maxRunningJobs from configuration server failed! Next use the default value to continue.",t)
        }
      }
      logger.info("groupName: {} =>  maxRunningJobs is {}", groupName, maxRunningJobs)
      val group = new ParallelGroup(groupName, initCapacity, maxCapacity)
      group.setMaxRunningJobs(maxRunningJobs)
      group.setMaxAskExecutorTimes(maxAskExecutorTimes)
      if(!groupNameToGroups.containsKey(groupName)) groupNameToGroups.put(groupName, group)
    }
    groupNameToGroups.get(groupName)
  }


  override def getGroupNameByEvent(event: SchedulerEvent): String = event match {
    case job: EntranceJob =>
      job.getTask match {
        case HaPersistenceTask(task) =>
          "HA"
        case _ =>EntranceGroupFactory.getGroupName(job.getCreator, job.getUser)
      }
  }
}
object EntranceGroupFactory {
  def getGroupName(creator: String, user: String): String = {
    if (StringUtils.isNotEmpty(creator)) creator + "_" + user
    else EntranceConfiguration.DEFAULT_REQUEST_APPLICATION_NAME.getValue + "_" + user
  }
} 
Example 44
Source File: package.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis

import java.util

import javax.servlet.http.HttpServletRequest
import com.webank.wedatasphere.linkis.common.exception.{ErrorException, ExceptionManager, FatalException, WarnException}
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.server.exception.{BDPServerErrorException, NonLoginException}
import com.webank.wedatasphere.linkis.server.security.SecurityFilter
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang.exception.ExceptionUtils
import org.slf4j.Logger

import scala.collection.{JavaConversions, mutable}


package object server {

  val EXCEPTION_MSG = "errorMsg"
  type JMap[K, V] = java.util.HashMap[K, V]

  implicit def getUser(req: HttpServletRequest): String = SecurityFilter.getLoginUsername(req)

  def validateFailed(message: String): Message = Message(status = 2).setMessage(message)
  def validate[T](json: util.Map[String, T], keys: String*): Unit = {
    keys.foreach(k => if(!json.contains(k) || json.get(k) == null || StringUtils.isEmpty(json.get(k).toString))
      throw new BDPServerErrorException(11001, s"Verification failed, $k cannot be empty!(验证失败,$k 不能为空!)"))
  }
  def error(message: String): Message = Message.error(message)
  implicit def ok(msg: String): Message = Message.ok(msg)
  implicit def error(t: Throwable): Message = Message.error(t)
  implicit def error(e: (String, Throwable)): Message = Message.error(e)
  implicit def error(msg: String, t: Throwable): Message = Message.error(msg -> t)
  //  def tryCatch[T](tryOp: => T)(catchOp: Throwable => T): T = Utils.tryCatch(tryOp)(catchOp)
//  def tryCatch(tryOp: => Message)(catchOp: Throwable => Message): Message = Utils.tryCatch(tryOp){
//    case nonLogin: NonLoginException => Message.noLogin(msg = nonLogin.getMessage)
//    case t => catchOp(t)
//  }
  def catchMsg(tryOp: => Message)(msg: String)(implicit log: Logger): Message = Utils.tryCatch(tryOp){
    case fatal: FatalException =>
      log.error("Fatal Error, system exit...", fatal)
      System.exit(fatal.getErrCode)
      Message.error("Fatal Error, system exit...")
    case nonLogin: NonLoginException =>
      val message = Message.noLogin(nonLogin.getMessage)
      message.data(EXCEPTION_MSG, nonLogin.toMap)
      message
    case error: ErrorException =>
      val cause = error.getCause
      val errorMsg = cause match {
        case t: ErrorException => s"error code(错误码): ${t.getErrCode}, error message(错误信息): ${t.getDesc}."
        case _ => s"error code(错误码): ${error.getErrCode}, error message(错误信息): ${error.getDesc}."
      }
      log.error(errorMsg, error)
      val message = Message.error(errorMsg)
      message.data(EXCEPTION_MSG, error.toMap)
      message
    case warn: WarnException =>
      val warnMsg = s"Warning code(警告码): ${warn.getErrCode}, Warning message(警告信息): ${warn.getDesc}."
      log.warn(warnMsg, warn)
      val message = Message.warn(warnMsg)
      message.data(EXCEPTION_MSG, warn.toMap)
      message
    case t =>
      log.error(msg, t)
      val errorMsg = ExceptionUtils.getRootCauseMessage(t)
      val message = if(StringUtils.isNotEmpty(errorMsg) && "operation failed(操作失败)" != msg) error(msg + "!the reason(原因):" + errorMsg)
      else if(StringUtils.isNotEmpty(errorMsg)) error(errorMsg) else error(msg)
      message.data(EXCEPTION_MSG, ExceptionManager.unknownException(message.getMessage))
  }
  def catchIt(tryOp: => Message)(implicit log: Logger): Message = catchMsg(tryOp)("operation failed(操作失败)s")
  implicit def toScalaBuffer[T](list: util.List[T]): mutable.Buffer[T] = JavaConversions.asScalaBuffer(list)
  implicit def toScalaMap[K, V](map: util.Map[K, V]): mutable.Map[K, V] = JavaConversions.mapAsScalaMap(map)
  implicit def toJavaList[T](list: mutable.Buffer[T]): util.List[T] = {
    val arrayList = new util.ArrayList[T]
    list.foreach(arrayList.add)
    arrayList
  }
  implicit def toJavaMap[K, V](map: mutable.Map[K, V]): JMap[K, V] = {
    val hashMap = new util.HashMap[K, V]()
    map.foreach(m => hashMap.put(m._1, m._2))
    hashMap
  }
  implicit def toJavaMap[K, V](map: Map[K, V]): JMap[K, V] = {
    val hashMap = new util.HashMap[K, V]()
    map.foreach(m => hashMap.put(m._1, m._2))
    hashMap
  }
  implicit def asString(mapWithKey: (util.Map[String, Object], String)): String = mapWithKey._1.get(mapWithKey._2).asInstanceOf[String]
  implicit def getString(mapWithKey: (util.Map[String, String], String)): String = mapWithKey._1.get(mapWithKey._2)
  implicit def asInt(map: util.Map[String, Object], key: String): Int = map.get(key).asInstanceOf[Int]
  implicit def asBoolean(mapWithKey: (util.Map[String, Object], String)): Boolean = mapWithKey._1.get(mapWithKey._2).asInstanceOf[Boolean]

} 
Example 45
Source File: HostsStatuses.scala    From algoliasearch-client-scala   with MIT License 5 votes vote down vote up
package algolia

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{Logger, LoggerFactory}

case class HostsStatuses(
    configuration: AlgoliaClientConfiguration,
    utils: AlgoliaUtils,
    queryHosts: Seq[String],
    indexingHosts: Seq[String]
) {

  private[algolia] val hostStatuses: ConcurrentHashMap[String, HostStatus] =
    new ConcurrentHashMap[String, HostStatus](5)

  val logger: Logger = LoggerFactory.getLogger("algoliasearch")

  def markHostAsUp(host: String): Unit = {
    logger.debug("Marking {} as `up`", host)
    hostStatuses.put(host, HostStatus.up(utils.now()))
  }

  def markHostAsDown(host: String): Unit = {
    logger.debug("Marking {} as `down`", host)
    hostStatuses.put(host, HostStatus.down(utils.now()))
  }

  def indexingHostsThatAreUp(): Seq[String] = hostsThatAreUp(indexingHosts)

  def queryHostsThatAreUp(): Seq[String] = hostsThatAreUp(queryHosts)

  private def hostsThatAreUp(hosts: Seq[String]): Seq[String] = {
    val filteredHosts = hosts.filter(h => isUpOrCouldBeRetried(getHostStatus(h))
    )
    if (filteredHosts.isEmpty) {
      hosts
    } else {
      filteredHosts
    }
  }

  def isUpOrCouldBeRetried(hostStatus: HostStatus): Boolean =
    hostStatus.up || (utils
      .now() - hostStatus.updatedAt) >= configuration.hostDownTimeoutMs

  private def getHostStatus(host: String): HostStatus =
    hostStatuses.getOrDefault(host, HostStatus.up(utils.now()))
}

private case class HostStatus(up: Boolean, updatedAt: Long)

private object HostStatus {

  def up(now: Long) = HostStatus(up = true, now)

  def down(now: Long) = HostStatus(up = false, now)

} 
Example 46
Source File: SparkCassOutputHandler.scala    From Spark2Cassandra   with Apache License 2.0 5 votes vote down vote up
package com.github.jparkie.spark.cassandra

import org.apache.cassandra.utils.OutputHandler
import org.slf4j.Logger


class SparkCassOutputHandler(log: Logger) extends OutputHandler {
  override def warn(msg: String): Unit = {
    log.warn(msg)
  }

  override def warn(msg: String, th: Throwable): Unit = {
    log.warn(msg, th)
  }

  override def debug(msg: String): Unit = {
    log.debug(msg)
  }

  override def output(msg: String): Unit = {
    log.info(msg)
  }
} 
Example 47
Source File: SampleRoutes.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package akkahttp

import java.io.File

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.sys.process.Process
import scala.util.{Failure, Success}


object SampleRoutes extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SampleRoutes")
  implicit val executionContext = system.dispatcher


  def getFromBrowsableDir: Route = {
    val dirToBrowse = File.separator + "tmp"

    // pathPrefix allows loading dirs and files recursively
    pathPrefix("entries") {
      getFromBrowseableDirectory(dirToBrowse)
    }
  }

  def parseFormData: Route = path("post") {
    formFields('color, 'age.as[Int]) { (color, age) =>
      complete(s"The color is '$color' and the age is $age")
    }
  }

  def routes: Route = {
    getFromBrowsableDir ~ parseFormData
  }

  val bindingFuture = Http().bindAndHandle(routes, "127.0.0.1", 8000)

  bindingFuture.onComplete {
    case Success(b) =>
      println("Server started, listening on: " + b.localAddress)
    case Failure(e) =>
      println(s"Server could not bind to... Exception message: ${e.getMessage}")
      system.terminate()
  }

  def browserClient() = {
    val os = System.getProperty("os.name").toLowerCase
    if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").!
  }

  browserClient()

  sys.addShutdownHook {
    println("About to shutdown...")
    val fut = bindingFuture.map(serverBinding => serverBinding.terminate(hardDeadline = 3.seconds))
    println("Waiting for connections to terminate...")
    val onceAllConnectionsTerminated = Await.result(fut, 10.seconds)
    println("Connections terminated")
    onceAllConnectionsTerminated.flatMap { _ => system.terminate()
    }
  }
} 
Example 48
package sample.stream

import akka.actor.ActorSystem
import akka.stream.Supervision.Decider
import akka.stream._
import akka.stream.scaladsl.{Flow, Sink, Source, SourceQueueWithComplete}
import akka.{Done, NotUsed}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}


object PublishToSourceQueueFromMultipleThreads extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("PublishToSourceQueueFromMultipleThreads")
  implicit val ec = system.dispatcher

  val bufferSize = 100
  // As of akka 2.6.x there is a thread safe implementation for SourceQueue
  val maxConcurrentOffers = 1000
  val numberOfPublishingClients = 1000

  val slowSink: Sink[Seq[Int], NotUsed] =
    Flow[Seq[Int]]
      .delay(2.seconds, DelayOverflowStrategy.backpressure)
      .to(Sink.foreach(e => logger.info(s"Reached sink: $e")))

  val sourceQueue: SourceQueueWithComplete[Int] = Source
    .queue[Int](bufferSize, OverflowStrategy.backpressure, maxConcurrentOffers)
    .groupedWithin(10, 1.seconds)
    .to(slowSink)
    .run

  val doneConsuming: Future[Done] = sourceQueue.watchCompletion()
  signalWhen(doneConsuming, "consuming") //never completes

  simulatePublishingFromMulitpleThreads()

  // Before 2.6.x a stream had to be used to throttle and control the backpressure
  //simulatePublishingClientsFromStream()

  // Decide on the stream level, because the OverflowStrategy.backpressure
  // on the sourceQueue causes an IllegalStateException
  // Handling this on the stream level allows to restart the stream
  private def simulatePublishingClientsFromStream() = {

    val decider: Decider = {
      case _: IllegalStateException => println("Got backpressure signal for offered element, restart..."); Supervision.Restart
      case _ => Supervision.Stop
    }

    val donePublishing: Future[Done] = Source(1 to numberOfPublishingClients)
      .mapAsync(10)(offerToSourceQueue) //throttle
      .withAttributes(ActorAttributes.supervisionStrategy(decider))
      .runWith(Sink.ignore)
    signalWhen(donePublishing, "publishing")
  }

  private def simulatePublishingFromMulitpleThreads() = (1 to numberOfPublishingClients).par.foreach(offerToSourceQueue)

  private def offerToSourceQueue(each: Int) = {
    sourceQueue.offer(each).map {
      case QueueOfferResult.Enqueued => logger.info(s"enqueued $each")
      case QueueOfferResult.Dropped => logger.info(s"dropped $each")
      case QueueOfferResult.Failure(ex) => logger.info(s"Offer failed: $ex")
      case QueueOfferResult.QueueClosed => logger.info("Source Queue closed")
    }
  }

  private def signalWhen(done: Future[Done], operation: String) = {
    done.onComplete {
      case Success(b) =>
        logger.info(s"Finished: $operation")
      case Failure(e) =>
        logger.info(s"Failure: $e About to terminate...")
        system.terminate()
    }
  }
} 
Example 49
Source File: TweetExample.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream

import java.time.{Instant, ZoneId}

import akka.NotUsed
import akka.actor.{ActorSystem, Cancellable}
import akka.stream.DelayOverflowStrategy
import akka.stream.scaladsl.{Flow, MergePrioritized, Sink, Source}
import org.apache.commons.lang3.exception.ExceptionUtils
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration._
import scala.util.{Failure, Success}



object TweetExample extends App {
  implicit val system = ActorSystem("TweetExample")
  implicit val ec = system.dispatcher
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  final case class Author(handle: String)

  final case class Hashtag(name: String)

  final case class Tweet(author: Author, timestamp: Long, body: String) {
    def hashtags: Set[Hashtag] =
      body.split(" ").collect { case t if t.startsWith("#") => Hashtag(t) }.toSet

    override def toString = {
      val localDateTime = Instant.ofEpochMilli(timestamp).atZone(ZoneId.systemDefault()).toLocalDateTime
      s"$localDateTime - ${author.handle} tweeted: ${body.take(5)}..."
    }
  }

  val akkaTag = Hashtag("#akka")

  val tweetsLowPrio: Source[Tweet, Cancellable] = Source.tick(1.second, 200.millis, NotUsed).map(_ => Tweet(Author("LowPrio"), System.currentTimeMillis, "#other #akka aBody"))
  val tweetsHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("HighPrio"), System.currentTimeMillis, "#akka #other aBody"))
  val tweetsVeryHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("VeryHighPrio"), System.currentTimeMillis, "#akka #other aBody"))

  val limitedTweets: Source[Tweet, NotUsed] = Source.combine(tweetsLowPrio, tweetsHighPrio, tweetsVeryHighPrio)(_ => MergePrioritized(List(1, 10, 100))).take(20)

  val processingFlow = Flow[Tweet]
    .filter(_.hashtags.contains(akkaTag))
    .wireTap(each => logger.info(s"$each"))

  val slowDownstream  =
    Flow[Tweet]
      .delay(5.seconds, DelayOverflowStrategy.backpressure)

  val processedTweets =
    limitedTweets
      .via(processingFlow)
      .via(slowDownstream)
      .runWith(Sink.seq)

  processedTweets.onComplete {
    case Success(results) =>
      logger.info(s"Successfully processed: ${results.size} tweets")
      system.terminate()
    case Failure(exception) =>
      logger.info(s"The stream failed with: ${ExceptionUtils.getRootCause(exception)}")
      system.terminate()
  }
} 
Example 50
Source File: AsyncExecution.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream

import akka.Done
import akka.actor.ActorSystem
import akka.stream.ActorAttributes
import akka.stream.scaladsl.{Flow, Sink, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.util.{Failure, Success}


object AsyncExecution extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("AsyncExecution")
  implicit val ec = system.dispatcher

  def stage(name: String) =
    Flow[Int]
      .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}"))

  def stageBlocking(name: String) =
    Flow[Int]
      .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}"))
      .wireTap(_ => Thread.sleep(5000))
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))

  def sinkBlocking: Sink[Int, Future[Done]] =
    Sink.foreach { index: Int =>
      Thread.sleep(2000)
      logger.info(s"Slow sink processing element $index by ${Thread.currentThread().getName}")
     }
      //Adding a custom dispatcher creates an async boundary
      //see discussion in: https://discuss.lightbend.com/t/how-can-i-make-sure-that-fileio-frompath-is-picking-up-my-dispatcher/6528/4
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))


  val done = Source(1 to 10)
    .via(stage("A")).async
    //When activated instead of alsoTo(sinkBlocking): elements for stage C are held up by stage B
    //.via(stageBlocking("B")).async
    .alsoTo(sinkBlocking).async
    .via(stage("C")).async
    .runWith(Sink.ignore)

  //With alsoTo(sinkBlocking) the stages A and C signal "done" too early and thus would terminate the whole stream
  //The reason for this is the custom dispatcher in sinkBlocking
  //terminateWhen(done)

  def terminateWhen(done: Future[_]) = {
    done.onComplete {
      case Success(_) =>
        println("Flow Success. About to terminate...")
        system.terminate()
      case Failure(e) =>
        println(s"Flow Failure: $e. About to terminate...")
        system.terminate()
    }
  }
} 
Example 51
Source File: WaitForThreeFlowsToComplete.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream._
import akka.stream.scaladsl._
import akka.util.ByteString
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent._
import scala.concurrent.duration._


object WaitForThreeFlowsToComplete extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("WaitForThreeFlowsToComplete")
  implicit val ec = system.dispatcher

  def lineSink(filename: String): Sink[String, Future[IOResult]] =
    Flow[String]
      .map(s => ByteString(s + "\n"))
      .wireTap(_ => logger.info(s"Add line to file: $filename"))
      .toMat(FileIO.toPath(Paths.get(filename)))(Keep.right) //retain to the Future[IOResult]
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))

  val origSource = Source(1 to 10)
  //scan (= transform) the source
  val factorialsSource = origSource.scan(BigInt(1))((acc, next) => acc * next)

  val fastFlow = origSource.runForeach(i => logger.info(s"Reached sink: $i"))

  val slowFlow1 = factorialsSource
    .map(_.toString)
    .runWith(lineSink("factorial1.txt"))

  val slowFlow2 = factorialsSource
    .zipWith(Source(0 to 10))((num, idx) => s"$idx! = $num")
    .throttle(1, 1.second, 1, ThrottleMode.shaping)
    .runWith(lineSink("factorial2.txt"))

  val allDone = for {
    fastFlowDone <- fastFlow
    slowFlow1Done <- slowFlow1
    slowFlow2Done <- slowFlow2
  } yield (fastFlowDone, slowFlow1Done, slowFlow2Done)

  allDone.onComplete { results =>
    logger.info(s"Resulting futures from flows: $results - about to terminate")
    system.terminate()
  }
} 
Example 52
Source File: SplitAfter.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream_shared_state

import java.time.{Instant, LocalDateTime, ZoneOffset}

import akka.Done
import akka.actor.ActorSystem
import akka.stream.scaladsl.{Sink, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.immutable._
import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}


object SplitAfter extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SplitAfter")
  implicit val executionContext = system.dispatcher

  private def hasSecondChanged: () => Seq[(Int, Instant)] => Iterable[(Instant, Boolean)] = {
    () => {
      slidingElements => {
        if (slidingElements.size == 2) {
          val current = slidingElements.head
          val next = slidingElements.tail.head
          val currentBucket = LocalDateTime.ofInstant(current._2, ZoneOffset.UTC).withNano(0)
          val nextBucket = LocalDateTime.ofInstant(next._2, ZoneOffset.UTC).withNano(0)
          List((current._2, currentBucket != nextBucket))
        } else {
          val current = slidingElements.head
          List((current._2, false))
        }
      }
    }
  }

  val done: Future[Done] = Source(1 to 100)
    .throttle(1, 100.millis)
    .map(elem => (elem, Instant.now()))
    .sliding(2)                           // allows to compare this element with the next element
    .statefulMapConcat(hasSecondChanged)  // stateful decision
    .splitAfter(_._2)                     // split when second has changed
    .map(_._1)                            // proceed with payload
    .fold(0)((acc, _) => acc + 1)   // sum
    .mergeSubstreams
    .runWith(Sink.foreach(each => println(s"Elements in group: $each")))

  terminateWhen(done)


  def terminateWhen(done: Future[_]) = {
    done.onComplete {
      case Success(_) =>
        println("Flow Success. About to terminate...")
        system.terminate()
      case Failure(e) =>
        println(s"Flow Failure: $e. About to terminate...")
        system.terminate()
    }
  }
} 
Example 53
Source File: ConflateWithSeed.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream_shared_state

import akka.actor.ActorSystem
import akka.stream.scaladsl.{Flow, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection._
import scala.concurrent.duration._
import scala.util.Random


object ConflateWithSeed extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("ConflateWithSeed")
  implicit val executionContext = system.dispatcher

  def seed(i: Int): mutable.LinkedHashMap[Int, Int] = mutable.LinkedHashMap[Int, Int](i -> 1)

  def aggregate(state: mutable.LinkedHashMap[Int, Int], i: Int): mutable.LinkedHashMap[Int, Int] = {
    logger.info(s"Got: $i")
    state.put(i, state.getOrElseUpdate(i, 0) + 1)
    state
  }

  // lazyFlow is not really needed here, but nice to know that it exists
  // conflateWithSeed invokes the seed method every time, so it
  // is safe to materialize this flow multiple times
  val lazyFlow = Flow.lazyFlow(() =>
    Flow[Int]
    .map(_ => Random.nextInt(100))
    .conflateWithSeed(seed)(aggregate)

  )
  Source(1 to 10)
    .via(lazyFlow)
    .throttle(1, 1.second) //simulate slow sink
    .runForeach(each => logger.info(s"1st reached sink: $each"))

//  Source(1 to 10)
//    .via(lazyFlow)
//    .throttle(1, 1.second) //simulate slow sink
//    .runForeach(each => logger.info(s"2nd reached sink: $each"))
} 
Example 54
Source File: SplitWhen.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream_shared_state

import java.nio.file.Paths

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream.IOResult
import akka.stream.scaladsl.{FileIO, Flow, Framing, Keep, Sink, Source}
import akka.util.ByteString
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.util.{Failure, Success}


object SplitWhen extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SplitWhen")
  implicit val executionContext = system.dispatcher

  val nonLinearCapacityFactor = 100 //raise to see how it scales
  val filename = "splitWhen.csv"

  def genResourceFile() = {
    logger.info(s"Writing resource file: $filename...")

    def fileSink(filename: String): Sink[String, Future[IOResult]] =
      Flow[String]
        .map(s => ByteString(s + "\n"))
        .toMat(FileIO.toPath(Paths.get(filename)))(Keep.right)

    Source.fromIterator(() => (1 to nonLinearCapacityFactor).toList.combinations(2))
      .map(each => s"${each.head},${each.last}")
      .runWith(fileSink(filename))
  }

  val sourceOfLines = FileIO.fromPath(Paths.get(filename))
    .via(Framing.delimiter(ByteString("\n"), maximumFrameLength = 1024, allowTruncation = true)
      .map(_.utf8String))

  val csvToRecord: Flow[String, Record, NotUsed] = Flow[String]
    .map(_.split(",").map(_.trim))
    .map(stringArrayToRecord)

  val terminationHook: Flow[Record, Record, Unit] = Flow[Record]
    .watchTermination() { (_, done) =>
      done.onComplete {
        case Failure(err) => logger.info(s"Flow failed: $err")
        case _ => system.terminate(); logger.info(s"Flow terminated")
      }
    }

  val printSink = Sink.foreach[Vector[Record]](each => println(s"Reached sink: $each"))

  private def stringArrayToRecord(cols: Array[String]) = Record(cols(0), cols(1))

  private def hasKeyChanged = {
    () => {
      var lastRecordKey: Option[String] = None
      currentRecord: Record =>
        lastRecordKey match {
          case Some(currentRecord.key) | None =>
            lastRecordKey = Some(currentRecord.key)
            List((currentRecord, false))
          case _ =>
            lastRecordKey = Some(currentRecord.key)
            List((currentRecord, true))
        }
    }
  }

  genResourceFile().onComplete {
    case Success(_) =>
      logger.info(s"Start processing...")
      sourceOfLines
        .via(csvToRecord)
        .via(terminationHook)
        .statefulMapConcat(hasKeyChanged)   // stateful decision
        .splitWhen(_._2)                    // split when key has changed
        .map(_._1)                          // proceed with payload
        .fold(Vector.empty[Record])(_ :+ _) // sum payload
        .mergeSubstreams                    // better performance, but why?
        .runWith(printSink)
    case Failure(exception) => logger.info(s"Exception: $exception")
  }

  case class Record(key: String, value: String)
} 
Example 55
Source File: Hl7TcpClient.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.tcp_to_websockets.hl7mllp

import akka.actor.ActorSystem
import akka.stream.scaladsl.{Sink, Source, Tcp}
import akka.util.ByteString
import ca.uhn.hl7v2.AcknowledgmentCode
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._

object Hl7TcpClient  extends App with MllpProtocol {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val system = ActorSystem("Hl7TcpClient")

  val (address, port) = ("127.0.0.1", 6160)

  //(1 to 1).par.foreach(each => localStreamingMessageClient(each, 1000, system, address, port))
  (1 to 1).par.foreach(each => localSingleMessageClient(each, 100, system, address, port))


  def localSingleMessageClient(clientname: Int, numberOfMessages: Int, system: ActorSystem, address: String, port: Int): Unit = {
    implicit val sys = system
    implicit val ec = system.dispatcher

    val connection = Tcp().outgoingConnection(address, port)

    def sendAndReceive(i: Int): Future[Int] = {
      val traceID = s"$clientname-${i.toString}"
      val source = Source.single(ByteString(encodeMllp(generateTestMessage(traceID)))).via(connection)
      val closed = source.runForeach(each =>
        if (isNACK(each)) {
          logger.info(s"Client: $clientname-$i received NACK: ${printable(each.utf8String)}")
          throw new RuntimeException("NACK")
        } else {
          logger.info(s"Client: $clientname-$i received ACK: ${printable(each.utf8String)}")
        }
      ).recoverWith {
        case _: RuntimeException => {
          logger.info(s"About to retry for: $clientname-$i...")
          sendAndReceive(i)
        }
        case e: Throwable => Future.failed(e)
      }
      closed.onComplete(each => logger.debug(s"Client: $clientname-$i closed: $each"))
      Future(i)
    }

    Source(1 to numberOfMessages)
      .throttle(1, 1.second)
      .mapAsync(1)(i => sendAndReceive(i))
      .runWith(Sink.ignore)
  }

  def localStreamingMessageClient(id: Int, numberOfMesssages: Int, system: ActorSystem, address: String, port: Int): Unit = {
    implicit val sys = system
    implicit val ec = system.dispatcher

    val connection = Tcp().outgoingConnection(address, port)

    val hl7MllpMessages=  (1 to numberOfMesssages).map(each => ByteString(encodeMllp(generateTestMessage(each.toString)) ))
    val source = Source(hl7MllpMessages).throttle(10, 1.second).via(connection)
    val closed = source.runForeach(each => logger.info(s"Client: $id received echo: ${printable(each.utf8String)}"))
    closed.onComplete(each => logger.info(s"Client: $id closed: $each"))
  }

  private def generateTestMessage(senderTraceID: String) = {
    //For now put the senderTraceID into the "sender lab" field to follow the messages accross the workflow
    val message = new StringBuilder
    message ++= s"MSH|^~\\&|$senderTraceID|MCM|LABADT|MCM|198808181126|SECURITY|ADT^A01|1234|P|2.5.1|"
    message ++= CARRIAGE_RETURN
    message ++= "EVN|A01|198808181123||"
    message ++= CARRIAGE_RETURN
    message ++= "PID|||PATID1234^5^M11^ADT1^MR^MCM~123456789^^^USSSA^SS||EVERYMAN^ADAM^A^III||19610615|M||C|1200 N ELM STREET^^GREENSBORO^NC^27401-1020"
    message ++= CARRIAGE_RETURN
    message ++= "NK1|1|JONES^BARBARA^K|SPO^Spouse^HL70063|171 ZOBERLEIN^^ISHPEMING^MI^49849^|"
    message ++= CARRIAGE_RETURN
    message ++= "PV1|1|I|2000^2012^01||||004777^LEBAUER^SIDNEY^J.|||SUR||||9|A0|"
    message ++= CARRIAGE_RETURN
    message.toString()
  }

  private def isNACK(message: ByteString): Boolean = {
    message.utf8String.contains(AcknowledgmentCode.AE.name()) ||
      message.utf8String.contains(AcknowledgmentCode.AR.name()) ||
      message.utf8String.contains(AcknowledgmentCode.CE.name()) ||
      message.utf8String.contains(AcknowledgmentCode.CR.name())
  }
} 
Example 56
Source File: JMSTextMessageProducerClient.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.jms

import akka.Done
import akka.actor.ActorSystem
import akka.stream.ThrottleMode
import akka.stream.alpakka.jms.scaladsl.JmsProducer
import akka.stream.alpakka.jms.{JmsProducerSettings, JmsTextMessage}
import akka.stream.scaladsl.{Sink, Source}
import com.typesafe.config.Config
import javax.jms.ConnectionFactory
import org.apache.activemq.ActiveMQConnectionFactory
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._

object JMSTextMessageProducerClient {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("JMSTextMessageProducerClient")
  implicit val ec = system.dispatcher

  //The "failover:" part in the brokerURL instructs ActiveMQ to reconnect on network failure
  //This does not interfere with the new 1.0-M2 implementation
  val connectionFactory: ConnectionFactory = new ActiveMQConnectionFactory("artemis", "simetraehcapa", "failover:tcp://127.0.0.1:21616")


  def main(args: Array[String]): Unit = {
    jmsTextMessageProducerClient(connectionFactory)
  }

  private def jmsTextMessageProducerClient(connectionFactory: ConnectionFactory) = {
    val producerConfig: Config = system.settings.config.getConfig(JmsProducerSettings.configPath)
    val jmsProducerSink: Sink[JmsTextMessage, Future[Done]] = JmsProducer.sink(
      JmsProducerSettings(producerConfig, connectionFactory).withQueue("test-queue")
    )

    Source(1 to 2000000)
      .throttle(1, 1.second, 1, ThrottleMode.shaping)
      .wireTap(number => logger.info(s"SEND Msg with TRACE_ID: $number"))
      .map { number =>
        JmsTextMessage(s"Payload: ${number.toString}")
          .withProperty("TRACE_ID", number)
      }
      .runWith(jmsProducerSink)
  }
} 
Example 57
Source File: TiRowRDD.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.tispark

import com.pingcap.tikv._
import com.pingcap.tikv.columnar.TiColumnarBatchHelper
import com.pingcap.tikv.meta.TiDAGRequest
import com.pingcap.tispark.listener.CacheInvalidateListener
import com.pingcap.tispark.{TiPartition, TiTableReference}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.{Partition, TaskContext, TaskKilledException}
import org.slf4j.Logger

import scala.collection.JavaConversions._

class TiRowRDD(
    override val dagRequest: TiDAGRequest,
    override val physicalId: Long,
    val chunkBatchSize: Int,
    override val tiConf: TiConfiguration,
    val output: Seq[Attribute],
    override val tableRef: TiTableReference,
    @transient private val session: TiSession,
    @transient private val sparkSession: SparkSession)
    extends TiRDD(dagRequest, physicalId, tiConf, tableRef, session, sparkSession) {

  protected val logger: Logger = log

  // cache invalidation call back function
  // used for driver to update PD cache
  private val callBackFunc = CacheInvalidateListener.getInstance()

  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] =
    new Iterator[ColumnarBatch] {
      checkTimezone()

      private val tiPartition = split.asInstanceOf[TiPartition]
      private val session = TiSession.getInstance(tiConf)
      session.injectCallBackFunc(callBackFunc)
      private val snapshot = session.createSnapshot(dagRequest.getStartTs)
      private[this] val tasks = tiPartition.tasks

      private val iterator =
        snapshot.tableReadChunk(dagRequest, tasks, chunkBatchSize)

      override def hasNext: Boolean = {
        // Kill the task in case it has been marked as killed. This logic is from
        // Interrupted Iterator, but we inline it here instead of wrapping the iterator in order
        // to avoid performance overhead.
        if (context.isInterrupted()) {
          throw new TaskKilledException
        }
        iterator.hasNext
      }

      override def next(): ColumnarBatch = {
        TiColumnarBatchHelper.createColumnarBatch(iterator.next)
      }
    }.asInstanceOf[Iterator[InternalRow]]

} 
Example 58
Source File: SparkFunSuite.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import org.apache.spark.internal.Logging
import org.scalatest._
import org.slf4j.Logger

abstract class SparkFunSuite extends FunSuite with Logging {
  protected val logger: Logger = log

  
  final protected override def withFixture(test: NoArgTest): Outcome = {
    val testName = test.text
    val suiteName = this.getClass.getName
    val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s")
    try {
      logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n")
      test()
    } finally {
      logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n")
    }
  }

  protected final def getTestResourcePath(file: String): String =
    getTestResourceFile(file).getCanonicalPath

  // helper function
  protected final def getTestResourceFile(file: String): File =
    new File(getClass.getClassLoader.getResource(file).getFile)

} 
Example 59
Source File: Utils.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.test

import java.io.{File, PrintWriter}
import java.nio.file.{Files, Paths}
import java.util.Properties

import org.slf4j.Logger

import scala.collection.JavaConversions._

object Utils {

  def writeFile(content: String, path: String): Unit =
    TryResource(new PrintWriter(path))(_.close()) {
      _.print(content)
    }

  def TryResource[T](res: T)(closeOp: T => Unit)(taskOp: T => Unit): Unit =
    try {
      taskOp(res)
    } finally {
      closeOp(res)
    }

  def readFile(path: String): List[String] =
    Files.readAllLines(Paths.get(path)).toList

  def getOrThrow(prop: Properties, key: String): String = {
    val jvmProp = System.getProperty(key)
    if (jvmProp != null) {
      jvmProp
    } else {
      val v = prop.getProperty(key)
      if (v == null) {
        throw new IllegalArgumentException(key + " is null")
      } else {
        v
      }
    }
  }

  def getFlagOrFalse(prop: Properties, key: String): Boolean =
    getFlag(prop, key, "false")

  private def getFlag(prop: Properties, key: String, defValue: String): Boolean =
    getOrElse(prop, key, defValue).equalsIgnoreCase("true")

  def getOrElse(prop: Properties, key: String, defValue: String): String = {
    val jvmProp = System.getProperty(key)
    if (jvmProp != null) {
      jvmProp
    } else {
      Option(prop.getProperty(key)).getOrElse(defValue)
    }
  }

  def getFlagOrTrue(prop: Properties, key: String): Boolean =
    getFlag(prop, key, "true")

  def time[R](block: => R)(logger: Logger): R = {
    val t0 = System.nanoTime()
    val result = block
    val t1 = System.nanoTime()
    logger.info("Elapsed time: " + (t1 - t0) / 1000.0 / 1000.0 / 1000.0 + "s")
    result
  }

  def ensurePath(basePath: String, paths: String*): Boolean =
    new File(joinPath(basePath, paths: _*)).mkdirs()

  def joinPath(basePath: String, paths: String*): String =
    Paths.get(basePath, paths: _*).toAbsolutePath.toString
} 
Example 60
Source File: Logging.scala    From spark-redis   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.redislabs.provider.redis.util

import org.slf4j.{Logger, LoggerFactory}


  @transient private var _logger: Logger = _

  protected def loggerName: String =
    this.getClass.getName.stripSuffix("$")

  protected def logger: Logger = {
    if (_logger == null) {
      _logger = LoggerFactory.getLogger(loggerName)
    }
    _logger
  }

  def logInfo(msg: => String): Unit = {
    if (logger.isInfoEnabled) {
      _logger.info(msg)
    }
  }

  def logDebug(msg: => String): Unit = {
    if (logger.isDebugEnabled) {
      _logger.debug(msg)
    }
  }

  def logTrace(msg: => String): Unit = {
    if (logger.isTraceEnabled) {
      _logger.trace(msg)
    }
  }
} 
Example 61
Source File: CacheManagerTokenStore.scala    From meteorite-core   with Apache License 2.0 5 votes vote down vote up
package bi.meteorite.core.security.tokenprovider

import com.hazelcast.core.HazelcastInstance
import org.ops4j.pax.cdi.api.OsgiService
import org.osgi.framework.BundleContext
import org.osgi.framework.wiring.BundleWiring
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import javax.annotation.PostConstruct
import javax.inject.{Named, Inject, Singleton}


@Singleton
@Named("CManager")
object CacheManagerTokenStore {
  private var logger: Logger = LoggerFactory.getLogger(classOf[CacheManagerTokenStore])
}

@Singleton class CacheManagerTokenStore extends TokenStorageProvider {
  @OsgiService
  @Inject private var cacheManager: HazelcastInstance = null
  @Inject private var bcontext: BundleContext = null

  @PostConstruct def init {
    CacheManagerTokenStore.logger.debug("*** Activating CacheManager")
    val c: CompositeClassLoader = new CompositeClassLoader
    val tccl: ClassLoader = Thread.currentThread.getContextClassLoader
    try {
      cacheManager.getConfig.setClassLoader(c)
    } finally {
      cacheManager.getConfig.setClassLoader(c)
    }
  }

  def addToken(token: Token) {
    addInvokerClassLoader(this.getClass.getClassLoader)
    cacheManager.getMap("tokens").put(token.getToken, token)
  }

  def updateToken(token: Token) {
  }

  def getToken(token: String): Token = {
    addInvokerClassLoader(getInvokerClassLoader)
    cacheManager.getMap("tokens").get(token).asInstanceOf[Token]
  }

  def hasToken(token: String): Boolean = {
    addInvokerClassLoader(getInvokerClassLoader)
    cacheManager.getMap("tokens").get(token) != null
  }

  def removeToken(token: Token) {
  }

  def setCacheManagerService(hazel: HazelcastInstance) {
    this.cacheManager = hazel
  }

  protected def addInvokerClassLoader(cl: ClassLoader) {
    getInstance.getConfig.getClassLoader.asInstanceOf[CompositeClassLoader].add(cl)
  }

  protected def getInvokerClassLoader: ClassLoader = {
    bcontext.getBundle.adapt(classOf[BundleWiring]).getClassLoader
  }

  def setBcontext(bcontext: BundleContext) {
    this.bcontext = bcontext
  }

  def getInstance: HazelcastInstance = {
    cacheManager
  }
} 
Example 62
Source File: StreamingApp.scala    From odsc-east-realish-predictions   with Apache License 2.0 5 votes vote down vote up
package com.twilio.open.odsc.realish.utils

import com.twilio.open.odsc.realish.listeners.InsightsQueryListener
import org.apache.spark.sql.SparkSession
import org.slf4j.Logger

trait StreamingApp {
  val logger: Logger
  def run(): Unit
}

trait Restartable {
  def restart(): Unit
}

trait RestartableStreamingApp extends StreamingApp with Restartable {
  val spark: SparkSession

  val streamingQueryListener: InsightsQueryListener = {
    new InsightsQueryListener(spark, restart)
  }

  def monitoredRun(): Unit = {
    run()
    monitorStreams()
  }

  
  def restart(): Unit = {
    logger.info(s"restarting the application. cleaning up old stream listener and streams")

    val streams = spark.streams
    streams.removeListener(streamingQueryListener)
    streams.active.foreach { stream =>
      logger.info(s"stream_name=${stream.name} state=active status=${stream.status} action=stop_stream")
      stream.stop()
    }
    logger.info(s"attempting to restart the application")
    monitoredRun()
  }
} 
Example 63
Source File: InsightsQueryListener.scala    From odsc-east-realish-predictions   with Apache License 2.0 5 votes vote down vote up
package com.twilio.open.odsc.realish.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._

object InsightsQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[InsightsQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): InsightsQueryListener = {
    new InsightsQueryListener(spark, restart)
  }

}

class InsightsQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import InsightsQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)

  def doubleToLong(value: Double): Long = {
    value match {
      case a if a.isInfinite => 0L
      case b if b == Math.floor(b) => b.toLong
      case c => Math.rint(c).toLong
    }
  }

  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    // note: leaving this here to remind that we can do fancy things with this for metrics sake
    

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    val tags = defaultTag + ( "stream_name" -> progress.name )
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate", tags).record(doubleToLong(processedRowsPerSecond))
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", tags).record(doubleToLong(inputRowsPerSecond))

    // todo - could take num.rows.total, given total percentage of records that will be watermarked going forwards... (simple metric that say loss_percentage due to watermark)

    // should give min, avg, max, watermark
    val eventTime = progress.eventTime
    if (eventTime != null) {

      log.info(s"event.time=${eventTime.asScala.mkString(",")}")
    }

    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
} 
Example 64
Source File: ImapTestUtils.scala    From gatling-imap   with GNU Affero General Public License v3.0 5 votes vote down vote up
package com.linagora.gatling.imap

import java.net.URI
import java.util.Properties

import org.slf4j.Logger

import com.yahoo.imapnio.async.client.ImapAsyncSession.DebugMode
import com.yahoo.imapnio.async.client.{ImapAsyncClient, ImapAsyncSession, ImapAsyncSessionConfig}

import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global

trait ImapTestUtils {

  protected def logger: Logger

  val threadNumber = 4
  val config = new Properties()
  val imapClient = new ImapAsyncClient(threadNumber)

  def connect(port: Int): Future[ImapAsyncSession] = {
    val serverUri = new URI(s"imap://localhost:$port")
    val config = new ImapAsyncSessionConfig
    config.setConnectionTimeoutMillis(5000)
    config.setReadTimeoutMillis(6000)
    val sniNames = null

    val localAddress = null
    Future {
      imapClient
        .createSession(serverUri, config, localAddress, sniNames, DebugMode.DEBUG_ON, "ImapTestUtilsCreated")
        .get()
        .getSession
    }
  }

} 
Example 65
Source File: ImapSessionsSpec.scala    From gatling-imap   with GNU Affero General Public License v3.0 5 votes vote down vote up
package com.linagora.gatling.imap.protocol.command

import java.util.Properties

import akka.actor.ActorSystem
import akka.testkit.TestProbe
import com.linagora.gatling.imap.Fixture.bart
import com.linagora.gatling.imap.protocol.{Command, ImapProtocol, ImapResponses, ImapSessions, Response, UserId}
import com.linagora.gatling.imap.{CyrusServer, ImapTestUtils, RunningServer}
import org.scalatest.{BeforeAndAfterEach, Matchers, WordSpec}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration._

class ImapSessionsSpec extends WordSpec with Matchers with ImapTestUtils with BeforeAndAfterEach {
  val logger: Logger = LoggerFactory.getLogger(this.getClass.getCanonicalName)

  private val server: RunningServer = CyrusServer.start()

  override def beforeEach(): Unit = {
    server.addUser(bart)
  }

  override protected def afterEach(): Unit = {
    system.terminate()
    server.stop()
  }

  implicit lazy val system: ActorSystem = ActorSystem("LoginHandlerSpec")
  "the imap sessions actor" should {
    "log a user in" in {
      val config = new Properties()
      val protocol = ImapProtocol("localhost", server.mappedImapPort(), config)

      val sessions = system.actorOf(ImapSessions.props(protocol))
      val probe = TestProbe()
      val userId = UserId(1)
      probe.send(sessions, Command.Connect(userId))
      probe.expectMsg(10.second, Response.Connected(ImapResponses.empty))
      probe.send(sessions, Command.Login(userId, bart))
      probe.expectMsgPF(10.second) {
        case Response.LoggedIn(responses: ImapResponses) => responses.isOk shouldBe true
      }
    }
  }

} 
Example 66
Source File: CyrusServer.scala    From gatling-imap   with GNU Affero General Public License v3.0 5 votes vote down vote up
package com.linagora.gatling.imap

import org.slf4j.{Logger, LoggerFactory}
import org.testcontainers.containers.GenericContainer

import com.yahoo.imapnio.async.request.CreateFolderCommand

import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext, ExecutionContextExecutor}

object CyrusServer extends Server {

  private val imapPort = 143
  private val logger: Logger = LoggerFactory.getLogger(CyrusServer.getClass)

  class RunningCyrusServer(val container: GenericContainer[_]) extends RunningServer with ImapTestUtils {
    protected val logger: Logger = CyrusServer.logger
    lazy val mappedImapPort: Integer = container.getMappedPort(imapPort)

    def addUser(login: String, password: String): Unit = {
      container.execInContainer("bash", "-c", s"echo $password | saslpasswd2 -u test -c $login -p")
      implicit val executionContext: ExecutionContextExecutor = ExecutionContext.global
      Await.result(
        connect(mappedImapPort)
          .flatMap(implicit session =>
          for {
            _ <- Imap.login("cyrus", "cyrus")
            _ <- Imap.rawCommand(new CreateFolderCommand(s"user.$login"))
            _ <- Imap.disconnect()
          } yield ()), 1.minute)

    }
    def stop(): Unit = container.stop()
  }

  def start(): RunningServer = {
    val cyrus = new GenericContainer("linagora/cyrus-imap")
    cyrus.addExposedPort(imapPort)
    cyrus.start()
    new RunningCyrusServer(cyrus)
  }
} 
Example 67
Source File: GenericMainClass.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.app.spark

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.config.{Config, ConfigFactory}
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._

trait GenericMainClass {
  self: SparkManager =>

  val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager")

  private def makeFileSystem(session: SparkSession): FileSystem = {
    if (session.sparkContext.isLocal) {
      FileSystem.getLocal(session.sparkContext.hadoopConfiguration)
    }
    else {
      FileSystem.get(session.sparkContext.hadoopConfiguration)
    }
  }


  
  // scalastyle:off
  private def getGlobalConfig: Config = {
    genericMainClassLogger.debug("system environment vars")
    for ((k, v) <- System.getenv().asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    genericMainClassLogger.debug("system properties")
    for ((k, v) <- System.getProperties.asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    ConfigFactory.load()
  }

  // scalastyle:on

} 
Example 68
Source File: SparkManager.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.app.spark

import com.typesafe.config.Config
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._

trait SparkManager {

  val sparkManagerLogger: Logger = LoggerFactory.getLogger("SparkManager")

  
  protected def defaultParallelism(implicit sparkSession: SparkSession, config: Config): Int = {
    sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_EXECUTOR_INSTANCES) match {
      case Some(instances) =>
        sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_CORES).getOrElse("1").toInt * instances.toInt
      case None =>
        sparkManagerLogger.info("Spark is configured with dynamic allocation, default parallelism will be gathered from app " +
          "conf: " +
          "next.process.parallelism")
        if (config.hasPath(SparkConfigurationKeys.PARALLELISM)) {
          config.getInt(SparkConfigurationKeys.PARALLELISM)
        } else {
          sparkManagerLogger.info("next.process.parallelism was not set fallback to sparkSession.defaultParallelism")
          sparkSession.sparkContext.defaultParallelism
        }
    }
  }
} 
Example 69
Source File: SchemaManagerSparkApp.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.app.spark

import java.nio.ByteOrder

import com.typesafe.config.{Config, ConfigFactory}
import it.agilelab.darwin.app.spark.classes._
import it.agilelab.darwin.manager.AvroSchemaManagerFactory
import org.apache.avro.reflect.ReflectData
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

object SchemaManagerSparkApp extends GenericMainClass with SparkManager {

  val mainLogger: Logger = LoggerFactory.getLogger("SchemaManagerSparkApp")

  val endianness: ByteOrder = ByteOrder.BIG_ENDIAN

  override protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int = {
    import sparkSession.implicits._

    val ds = sparkSession.createDataset(sparkSession.sparkContext.parallelize(1 to 1000, 20))
    mainLogger.info("Registering schemas")
    //    val reflections = new Reflections("it.agilelab.darwin.app.spark.classes")
    //    val annotationClass: Class[AvroSerde] = classOf[AvroSerde]
    //    val classes = reflections.getTypesAnnotatedWith(annotationClass).asScala.toSeq
    //      .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers))
    //    val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)))
    val schemas = Seq(ReflectData.get().getSchema(classOf[Menu]), ReflectData.get().getSchema(classOf[MenuItem]),
      ReflectData.get().getSchema(classOf[Food]), ReflectData.get().getSchema(classOf[Order]),
      ReflectData.get().getSchema(classOf[Price]))
    val conf = ConfigFactory.load()
    val manager = AvroSchemaManagerFactory.initialize(conf)
    val registeredIDs: Seq[Long] = manager.registerAll(schemas).map(_._1)
    mainLogger.info("Schemas registered")

    mainLogger.info("Getting ID for a schema")
    manager.getId(ReflectData.get().getSchema(classOf[Menu]))
    mainLogger.info("ID retrieved for the schema")

    mainLogger.info("Get Schema from ID")
    val d2 = ds.map { x =>
      AvroSchemaManagerFactory.initialize(conf).getSchema(registeredIDs(x % registeredIDs.size))
      x
    }
    d2.count()
    mainLogger.info("All schemas obtained")
    10
  }

  override protected def handleException(exception: Throwable, applicationSettings: Config): Unit = {
    mainLogger.error(exception.getMessage)
  }
} 
Example 70
Source File: Logging.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.pipelines

import org.slf4j.{Logger, LoggerFactory}


trait Logging {
  // Make the log field transient so that objects with Logging can
  // be serialized and used on another machine
  @transient private var log_ : Logger = null

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (log_ == null) {
      var className = this.getClass.getName
      // Ignore trailing $'s in the class names for Scala objects
      if (className.endsWith("$")) {
        className = className.substring(0, className.length - 1)
      }
      log_ = LoggerFactory.getLogger(className)
    }
    log_
  }

  // Log methods that take only a String
  protected def logInfo(msg: => String) {
    if (log.isInfoEnabled) log.info(msg)
  }

  protected def logDebug(msg: => String) {
    if (log.isDebugEnabled) log.debug(msg)
  }

  protected def logTrace(msg: => String) {
    if (log.isTraceEnabled) log.trace(msg)
  }

  protected def logWarning(msg: => String) {
    if (log.isWarnEnabled) log.warn(msg)
  }

  protected def logError(msg: => String) {
    if (log.isErrorEnabled) log.error(msg)
  }

  // Log methods that take Throwables (Exceptions/Errors) too
  protected def logInfo(msg: => String, throwable: Throwable) {
    if (log.isInfoEnabled) log.info(msg, throwable)
  }

  protected def logDebug(msg: => String, throwable: Throwable) {
    if (log.isDebugEnabled) log.debug(msg, throwable)
  }

  protected def logTrace(msg: => String, throwable: Throwable) {
    if (log.isTraceEnabled) log.trace(msg, throwable)
  }

  protected def logWarning(msg: => String, throwable: Throwable) {
    if (log.isWarnEnabled) log.warn(msg, throwable)
  }

  protected def logError(msg: => String, throwable: Throwable) {
    if (log.isErrorEnabled) log.error(msg, throwable)
  }
} 
Example 71
Source File: BackgroundAsyncHandler.scala    From play-ws   with Apache License 2.0 5 votes vote down vote up
package play.api.libs.ws.ahc.cache

import play.shaded.ahc.org.asynchttpclient._
import com.typesafe.play.cachecontrol.ResponseCachingActions.DoCacheResponse
import com.typesafe.play.cachecontrol.ResponseCachingActions.DoNotCacheResponse
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import play.shaded.ahc.io.netty.handler.codec.http.HttpHeaders

import scala.concurrent.Await


class BackgroundAsyncHandler[T](request: Request, cache: AhcHttpCache, ahcConfig: AsyncHttpClientConfig)
    extends AsyncHandler[T]
    with Debug {

  import BackgroundAsyncHandler.logger

  private val timeout = scala.concurrent.duration.Duration(1, "second")

  private val builder = new CacheableResponseBuilder(ahcConfig)

  private val key = EffectiveURIKey(request)

  @throws(classOf[Exception])
  def onBodyPartReceived(content: HttpResponseBodyPart): AsyncHandler.State = {
    builder.accumulate(content)
    AsyncHandler.State.CONTINUE
  }

  @throws(classOf[Exception])
  def onStatusReceived(status: HttpResponseStatus): AsyncHandler.State = {
    builder.reset()
    builder.accumulate(status)
    AsyncHandler.State.CONTINUE
  }

  @throws(classOf[Exception])
  def onHeadersReceived(headers: HttpHeaders): AsyncHandler.State = {
    builder.accumulate(headers)
    AsyncHandler.State.CONTINUE
  }

  def onThrowable(t: Throwable): Unit = {
    logger.error(s"onThrowable: received on request $request", t)
  }

  override def onCompleted(): T = {
    val response: CacheableResponse = builder.build

    if (cache.isNotModified(response)) {
      processNotModifiedResponse(response)
    } else {
      processFullResponse(response)
    }

    response.asInstanceOf[T]
  }

  protected def processFullResponse(fullResponse: CacheableResponse): Unit = {
    logger.debug(s"processFullResponse: fullResponse = ${debug(fullResponse)}")

    cache.cachingAction(request, fullResponse) match {
      case DoNotCacheResponse(reason) =>
        logger.debug(s"onCompleted: DO NOT CACHE, because $reason")
      case DoCacheResponse(reason) =>
        logger.debug(s"isCacheable: DO CACHE, because $reason")
        cache.cacheResponse(request, fullResponse)
    }
  }

  protected def processNotModifiedResponse(notModifiedResponse: CacheableResponse): Unit = {
    logger.trace(s"processNotModifiedResponse: notModifiedResponse = $notModifiedResponse")

    val result = Await.result(cache.get(key), timeout)
    logger.debug(s"processNotModifiedResponse: result = $result")

    // FIXME XXX Find the response which matches the secondary keys...
    result match {
      case Some(entry) =>
        val newHeaders    = notModifiedResponse.getHeaders
        val freshResponse = cache.freshenResponse(newHeaders, entry.response)
        cache.cacheResponse(request, freshResponse)
      case None =>
      // XXX FIXME what do we do if we have a 304 and there's nothing in the cache for it?
      // If we make another call and it sends us another 304 back, we can get stuck in an
      // endless loop?

    }

  }

}

object BackgroundAsyncHandler {
  private val logger: Logger = LoggerFactory.getLogger("play.api.libs.ws.ahc.cache.BackgroundAsyncHandler")
} 
Example 72
Source File: KryoStringEventBatch.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.log

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.yahoo.maha.data.StringEventBatch
import org.slf4j.{Logger, LoggerFactory}


object KryoStringEventBatch {
  private val logger: Logger = LoggerFactory.getLogger(classOf[KryoStringEventBatch])
}

class KryoStringEventBatch extends Serializer[StringEventBatch] {
  KryoStringEventBatch.logger.info("Created instance of " + this.getClass.getSimpleName)

  override def write(kryo: Kryo, output: Output, stringEventBatch: StringEventBatch): Unit = {
    val size: Int = stringEventBatch.getEvents.size
    output.writeInt(size)
    stringEventBatch.getEvents.stream().forEach(output.writeString(_))
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[StringEventBatch]): StringEventBatch = {
    val size: Int = input.readInt
    val builder: StringEventBatch.Builder = new StringEventBatch.Builder(size)
    var i: Int = 0
    while ( i < size) {
      builder.add(input.readString)
      i += 1
    }
    builder.build.asInstanceOf[StringEventBatch]
  }
} 
Example 73
Source File: PlanResultLogger.scala    From rug   with GNU General Public License v3.0 5 votes vote down vote up
package com.atomist.rug.runtime.plans

import com.atomist.rug.spi.Handlers.Status.Failure
import com.atomist.rug.spi.Handlers._
import org.slf4j.Logger

import scala.annotation.tailrec
import scala.concurrent.Await
import scala.concurrent.duration.DurationInt

class PlanResultLogger(val logger: Logger) {

  def log(planResult: PlanResult): Unit = {
    logEvents(planResult.log)
  }

  @tailrec
  private def logEvents(log: Seq[PlanLogEvent]): Unit = {
    log.headOption match {
      case Some(head) =>
        val remainingEvents = head match {
          case logError: PlanLogError =>
            logger.error("Error running plan.", logError.error)
            log.tail
          case result: InstructionResult if result.response.status == Failure =>
            logger.error("Failure running plan.", result)
            log.tail
          case result: NestedPlanRun =>
            val planResult = result.planResult
            log.tail ++ planResult.log
          case _ => log.tail
        }
        logEvents(remainingEvents)
      case None =>
    }
  }
} 
Example 74
Source File: StatusCheckerModule.scala    From CloudGenesis   with Apache License 2.0 5 votes vote down vote up
package com.lifeway.cloudops.cloudformation

import akka.actor.{ActorSystem, Scheduler}
import com.amazonaws.services.cloudformation.AmazonCloudFormation

import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration._
import akka.pattern.after
import com.amazonaws.AmazonServiceException
import com.lifeway.cloudops.cloudformation.Types.StackName
import org.scalactic._
import org.slf4j.Logger

trait StatusCheckerModule {
  val logger: Logger

  def waitForStatus(
      actorSystem: ActorSystem,
      maxRetries: Int = 100,
      maxWaitTime: Duration = 5.minutes,
      retrySpeed: FiniteDuration = 3.seconds)(statusFetcher: (AmazonCloudFormation, String) => (String, String))(
      cfClient: AmazonCloudFormation,
      id: String,
      stackName: StackName,
      waitForStatus: Types.Status,
      failIfInStatus: Seq[Types.Status]): Unit Or AutomationError = {

    implicit val ec: ExecutionContext = actorSystem.dispatcher
    implicit val sch: Scheduler       = actorSystem.scheduler

    sealed trait StatusException            extends Exception
    case object PendingException            extends StatusException
    case class FailedException(msg: String) extends StatusException

    def checkStatus: Unit = {
      val (status, reason) = statusFetcher(cfClient, id)

      if (status == waitForStatus) ()
      else if (failIfInStatus.contains(status))
        throw new FailedException(s"Unexpected stack status: $status. Reason: $reason")
      else throw PendingException
    }

    def retry(op: => Unit, delay: FiniteDuration, retries: Int): Future[Unit Or AutomationError] =
      Future(op).map(x => Good(x)) recoverWith {
        case PendingException if retries > 0 => after(delay, sch)(retry(op, delay, retries - 1))
        case FailedException(err) =>
          Future.successful(
            Bad(StackError(s"Failed to reach expected status of $waitForStatus for $stackName due to: $err")))
        case t: AmazonServiceException if t.getStatusCode >= 500 =>
          logger.error(s"AWS 500 Service Exception: Failed to reach expected status of $waitForStatus for $stackName",
                       t)
          Future.successful(
            Bad(ServiceError(
              s"AWS 500 Service Exception: Failed to reach expected status of $waitForStatus for $stackName")))
        case _ =>
          Future.successful(Bad(StackError(s"Failed to reach expected status of $waitForStatus for $stackName")))
      }

    //Retry to find final status for up to max time...
    try {
      Await.result(retry(checkStatus, retrySpeed, maxRetries), maxWaitTime)
    } catch {
      case _: Throwable =>
        Bad(
          StackError(
            s"Failed to wait to reach expected status of $waitForStatus for $stackName due to process timeout"))
    }
  }
} 
Example 75
Source File: Application.scala    From spring-scala-examples   with Apache License 2.0 5 votes vote down vote up
package hello

import org.slf4j.Logger
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.CommandLineRunner
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.jdbc.core.JdbcTemplate

import collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, ListBuffer}

@SpringBootApplication
class Application extends CommandLineRunner {

    @Autowired
    var jdbcTemplate: JdbcTemplate = _


    val log: Logger = LoggerFactory.getLogger(classOf[Application])

    override def run(args: String*): Unit = {
        log.info("Creating tables")
        jdbcTemplate.execute("DROP TABLE customers IF EXISTS")
        jdbcTemplate.execute("CREATE TABLE customers(" +
                "id SERIAL, first_name VARCHAR(255), last_name VARCHAR(255))")

        // Split up the array of whole names into an array of first/last names
        val splitUpNames: mutable.Buffer[Array[AnyRef]] = ListBuffer("John Woo", "Jeff Dean", "Josh Bloch", "Josh Long").map(_.split(" ")).asInstanceOf[mutable.Buffer[Array[AnyRef]]]

        // Use a Java 8 stream to print out each tuple of the list
      for(name <- splitUpNames)
        splitUpNames.foreach{ case(Array(name)) => log.info("Inserting customer record for %s %s".format(name(0), name(1)))}

        // Uses JdbcTemplate's batchUpdate operation to bulk load data
        jdbcTemplate.batchUpdate("INSERT INTO customers(first_name, last_name) VALUES (?,?)", splitUpNames.asJava)

        log.info("Querying for customer records where first_name = 'Josh':")
//        jdbcTemplate.query(
//                "SELECT id, first_name, last_name FROM customers WHERE first_name = ?", new Object[] { "Josh" },
//                (rs, rowNum) -> new Customer(rs.getLong("id"), rs.getString("first_name"), rs.getString("last_name"))
//        ).forEach(customer -> log.info(customer.toString()))

    }
}

object Application extends App {
  SpringApplication.run(classOf[Application], args:_*)
} 
Example 76
Source File: Application.scala    From spring-scala-examples   with Apache License 2.0 5 votes vote down vote up
package hello

import java.sql.ResultSet

import org.slf4j.Logger
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.CommandLineRunner
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.jdbc.core.{JdbcTemplate, RowMapper}

import collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ListBuffer

@SpringBootApplication
class Application extends CommandLineRunner {

  @Autowired
  var jdbcTemplate: JdbcTemplate = _


  val log: Logger = LoggerFactory.getLogger(classOf[Application])

  override def run(args: String*): Unit = {
    log.info("Creating tables")
    jdbcTemplate.execute("DROP TABLE customers IF EXISTS")
    jdbcTemplate.execute("CREATE TABLE customers(" +
      "id SERIAL, first_name VARCHAR(255), last_name VARCHAR(255))")

    val splitUpNames = ListBuffer("John Woo", "Jeff Dean", "Josh Bloch", "Josh Long").map(_.split(" "))
    splitUpNames.foreach(name => log.info("Inserting customer record for %s %s".format(name(0), name(1))))

    jdbcTemplate.batchUpdate("INSERT INTO customers(first_name, last_name) VALUES (?,?)", splitUpNames.asInstanceOf[mutable.Buffer[Array[AnyRef]]].asJava)

    log.info("Querying for customer records where first_name = 'Josh':")
    jdbcTemplate.query(
      "SELECT id, first_name, last_name FROM customers WHERE first_name = ?",
      Array("Josh").asInstanceOf[Array[AnyRef]],
      // no Java 8 Lambda support in Scala pre 2.12
      new RowMapper[Customer]{
        override def mapRow(rs: ResultSet, rowNum: Int): Customer = new Customer(rs.getLong("id"), rs.getString("first_name"), rs.getString("last_name"))
      })
      // Works in Scala 2.12
      // (rs: ResultSet, rowNum: Int) => new Customer(rs.getLong("id"), rs.getString("first_name"), rs.getString("last_name"))    )
      .asScala.foreach((customer:Customer) => log.info(customer.toString))
  }
}

object Application extends App {
  SpringApplication.run(classOf[Application], args:_*)
} 
Example 77
Source File: Application.scala    From spring-scala-examples   with Apache License 2.0 5 votes vote down vote up
package hello

import org.slf4j.{Logger, LoggerFactory}
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.boot.{CommandLineRunner, SpringApplication}
import org.springframework.http.ResponseEntity
import org.springframework.util.concurrent.{ListenableFuture, ListenableFutureCallback, SuccessCallback}
import org.springframework.web.client.{AsyncRestTemplate, RestTemplate}

@SpringBootApplication
class Application extends CommandLineRunner{
  val log: Logger = LoggerFactory.getLogger(classOf[Application])

  override def run(args: String*): Unit = {
    val restTemplate = new RestTemplate()

    // synchronous version
    val quote : Quote =  restTemplate.getForObject("http://gturnquist-quoters.cfapps.io/api/random", classOf[Quote])
    log.info(quote.toString)

    // async version
    val asyncRestTemplate = new AsyncRestTemplate()
    val quoteFuture : ListenableFuture[ResponseEntity[Quote]] =  asyncRestTemplate.getForEntity("http://gturnquist-quoters.cfapps.io/api/random", classOf[Quote])

    quoteFuture.addCallback(new ListenableFutureCallback[ResponseEntity[Quote]]() {
      override def onSuccess(entity : ResponseEntity[Quote]) : Unit = log.info("async: " + entity.getBody.toString)
      override def onFailure(t : Throwable) : Unit = log.error("Async error", t)
    })
  }
}

object Application extends App {
  SpringApplication.run(classOf[Application], args:_*)
} 
Example 78
Source File: MyLogger.scala    From Squerall   with Apache License 2.0 5 votes vote down vote up
package org.squerall

import org.slf4j.{Logger, LoggerFactory}

import scala.language.implicitConversions


trait MyLogger {

  @transient private var log_ : Logger = _

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (log_ == null) {
      log_ = LoggerFactory.getLogger(logName)
    }
    log_
  }

  // Method to get the logger name for this object
  protected def logName: String = {
    // Ignore trailing $'s in the class names for Scala objects
    this.getClass.getName.stripSuffix("$")
  }


  def trace(msg: => String): Unit = { if (log.isTraceEnabled) log.trace(msg) }
  def trace(msg: => String, e: Throwable): Unit = { if (log.isTraceEnabled) log.trace(msg, e) }
  def trace(msg: => String, o: Any, os: Any*): Unit = { if (log.isTraceEnabled) log.trace(msg, o, os) }

  def debug(msg: => String): Unit = { if (log.isDebugEnabled) log.debug(msg) }
  def debug(msg: => String, e: Throwable): Unit = { if (log.isDebugEnabled) log.debug(msg, e) }
  def debug(msg: => String, o: Any, os: Any*): Unit = { if (log.isDebugEnabled) log.debug(msg, o, os) }

  def info(msg: => String): Unit = { if (log.isInfoEnabled)  log.info(msg) }
  def info(msg: => String, e: Throwable): Unit = { if (log.isInfoEnabled)  log.info(msg, e) }
  def info(msg: => String, o: Any, os: Any*): Unit = { if (log.isInfoEnabled)  log.info(msg, o, os) }

  def warn(msg: => String): Unit = { if (log.isWarnEnabled)  log.warn(msg) }
  def warn(msg: => String, e: Throwable): Unit = { if (log.isWarnEnabled)  log.warn(msg, e) }
  def warn(msg: => String, o: Any, os: Any*): Unit = { if (log.isWarnEnabled)  log.warn(msg, o, os) }

  def error(msg: => String): Unit = { if (log.isErrorEnabled) log.error(msg) }
  def error(msg: => String, e: Throwable): Unit = { if (log.isErrorEnabled) log.error(msg, e) }
  def error(msg: => String, o: Any, os: Any*): Unit = { if (log.isErrorEnabled) log.error(msg, o, os) }

  def mark(msg: => String): Unit = { if (log.isErrorEnabled) log.error(msg) }
  def mark(msg: => String, e: Throwable): Unit = { if (log.isErrorEnabled) log.error(msg, e) }
  def mark(msg: => String, o: Any, os: Any*): Unit = { if (log.isErrorEnabled) log.error(msg, o, os) }
}

private object MyLogger {
  implicit def logging2Logger(anything: MyLogger): Logger = anything.log
} 
Example 79
Source File: StructuredLogSpanHandler.scala    From money   with Apache License 2.0 5 votes vote down vote up
package com.comcast.money.core.handlers

import com.comcast.money.api.{ Note, SpanInfo }
import com.typesafe.config.Config
import org.slf4j.{ Logger, LoggerFactory, MDC }


class StructuredLogSpanHandler(
  val logger: Logger = LoggerFactory.getLogger(classOf[StructuredLogSpanHandler]),
  val mdcFunc: (String, String) => Unit = (x: String, y: String) => MDC.put(x, y))
  extends ConfigurableHandler {

  // Extra constructor because java spring programs have a problem with the default function in the constructor above.
  def this() = this(LoggerFactory.getLogger(classOf[StructuredLogSpanHandler]), (k: String, v: String) => MDC.put(k, v))

  import com.comcast.money.core.handlers.LoggingSpanHandler._

  protected var logFunction: LogFunction = logger.info

  def configure(config: Config): Unit = {

    if (config.hasPath("log-level")) {
      val level = config.getString("log-level").toUpperCase

      // set the log level based on the configured value
      level match {
        case "ERROR" => logFunction = logger.error
        case "WARN" => logFunction = logger.warn
        case "INFO" => logFunction = logger.info
        case "DEBUG" => logFunction = logger.debug
        case "TRACE" => logFunction = logger.trace
      }
    }
  }

  def handle(spanInfo: SpanInfo): Unit = {
    import scala.collection.JavaConverters._
    val baseFields = Seq(
      // The field names below are the same as cedi-dtrace. This makes it easier to query a transaction in elastic search.
      ("trace-id", spanInfo.id.traceId()),
      ("parent-id", spanInfo.id.parentId()),
      ("span-id", spanInfo.id.selfId()),
      ("span-name", spanInfo.name()),
      ("app", spanInfo.appName()),
      ("host", spanInfo.host()),
      ("start-time", java.time.Instant.ofEpochMilli(spanInfo.startTimeMillis())),
      ("end-time", java.time.Instant.ofEpochMilli(spanInfo.endTimeMillis())),
      ("span-duration", spanInfo.durationMicros()),
      ("span-success", spanInfo.success()))
    val noteFields: Seq[(String, Any)] = spanInfo.notes.values.asScala.map(n => (n.name(), n.value())).toSeq
    val allFields = baseFields ++ noteFields

    allFields.foreach(p => mdcFunc(p._1, p._2.toString))

    logFunction(allFields.map { case (k, v) => s"$k:$v" }.mkString("[", "][", "]"))
  }
} 
Example 80
Source File: LoggingSpanHandler.scala    From money   with Apache License 2.0 5 votes vote down vote up
package com.comcast.money.core.handlers

import com.comcast.money.api.SpanInfo
import com.typesafe.config.Config
import org.slf4j.{ Logger, LoggerFactory }

object LoggingSpanHandler {

  type LogFunction = String => Unit

  val HEADER_FORMAT: String = "Span: [ span-id=%s ][ trace-id=%s ][ parent-id=%s ][ span-name=%s ][ " +
    "app-name=%s ][ start-time=%s ][ span-duration=%s ][ span-success=%s ] [ host=%s ]"
  val NOTE_BEGIN = "[ "
  val NOTE_END = " ]"
  val EQ = "="
  val NULL: String = "NULL"
}

class LoggingSpanHandler(val logger: Logger, makeFormatter: Config => SpanLogFormatter) extends ConfigurableHandler {

  def this() = this(LoggerFactory.getLogger(classOf[LoggingSpanHandler]), SpanLogFormatter.apply)

  import LoggingSpanHandler._

  protected var logFunction: LogFunction = logger.info
  protected var formatter: SpanLogFormatter = _

  def configure(config: Config): Unit = {

    if (config.hasPath("log-level")) {
      val level = config.getString("log-level").toUpperCase

      // set the log level based on the configured value
      level match {
        case "ERROR" => logFunction = logger.error
        case "WARN" => logFunction = logger.warn
        case "INFO" => logFunction = logger.info
        case "DEBUG" => logFunction = logger.debug
        case "TRACE" => logFunction = logger.trace
      }
    }

    val formattingConfig = config.getConfig("formatting")
    formatter = makeFormatter(formattingConfig)
  }

  def handle(spanInfo: SpanInfo): Unit = {
    logFunction(formatter.buildMessage(spanInfo))
  }
} 
Example 81
Source File: TraceLoggingSpec.scala    From money   with Apache License 2.0 5 votes vote down vote up
package com.comcast.money.core.logging

import org.mockito.Mockito._
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{ Matchers, OneInstancePerTest, WordSpec }
import org.slf4j.Logger

class TraceLoggingSpec extends WordSpec with Matchers with MockitoSugar with OneInstancePerTest {

  val mockLogger = mock[Logger]

  "TraceLogging" should {
    "capture exceptions into a log" in {
      val testTraceLogging = new TraceLogging {
        override lazy val shouldLogExceptions: Boolean = true
        override val logger: Logger = mockLogger
      }

      val t = mock[Throwable]
      testTraceLogging.logException(t)
      verify(mockLogger).error("Tracing exception", t)
    }
    "not capture exceptions if log exceptions is not enabled" in {
      val testTraceLogging = new TraceLogging {
        override lazy val shouldLogExceptions: Boolean = false
        override val logger: Logger = mockLogger
      }
      val t = mock[Throwable]
      testTraceLogging.logException(t)
      verifyZeroInteractions(mockLogger)
    }
  }
} 
Example 82
Source File: PipeLine.scala    From gearpump-examples   with Apache License 2.0 5 votes vote down vote up
package io.gearpump.examples.tap_pipeline

import akka.actor.ActorSystem
import com.typesafe.config.{ConfigFactory, ConfigRenderOptions}
import io.gearpump.cluster.UserConfig
import io.gearpump.cluster.client.ClientContext
import io.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import io.gearpump.external.hbase.HBaseSink
import io.gearpump.streaming.StreamApplication
import io.gearpump.streaming.kafka.{KafkaSource, KafkaStorageFactory}
import io.gearpump.streaming.sink.DataSinkProcessor
import io.gearpump.streaming.source.DataSourceProcessor
import io.gearpump.tap.TapJsonConfig
import io.gearpump.util.Graph._
import io.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object PipeLine extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "hbase"-> CLIOption[String]("<hbase instance>", required = false, defaultValue = Some("hbase")),
    "kafka"-> CLIOption[String]("<kafka instance>", required = false, defaultValue = Some("kafka")),
    "table"-> CLIOption[String]("<hbase table>", required = false, defaultValue = Some("gp_tap_table")),
    "topic"-> CLIOption[String]("<kafka topic>", required = false, defaultValue = Some("gp_tap_topic"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system

    val conf = ConfigFactory.load
    val services = conf.root.withOnlyKey("VCAP_SERVICES").render(ConfigRenderOptions.defaults().setJson(true))
    val tjc = new TapJsonConfig(services)
    val hbaseconfig = tjc.getHBase(config.getString("hbase"))
    //val kafkaconfig = tjc.getKafka(config.getString("hbase"))
    val kafkaconfig = Map(
      "zookeepers" -> "10.10.10.46:9092,10.10.10.164:9092,10.10.10.236:9092",
      "brokers" -> "10.10.10.46:2181,10.10.10.236:2181,10.10.10.164:2181/kafka"
    )
    val topic = config.getString("topic")
    val table = config.getString("table")
    val zookeepers = kafkaconfig.get("zookeepers").get
    val brokers = kafkaconfig.get("brokers").get
    val source = DataSourceProcessor(new KafkaSource(topic, zookeepers,new KafkaStorageFactory(zookeepers, brokers)), 1)
    val sink = DataSinkProcessor(new HBaseSink(table, hbaseconfig), 1)
    val app = StreamApplication("TAPPipeline", Graph(
      source ~> sink
    ), UserConfig.empty)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }

} 
Example 83
Source File: PipeLine.scala    From gearpump-examples   with Apache License 2.0 5 votes vote down vote up
package io.gearpump.examples.kafka_hdfs_pipeline

import akka.actor.ActorSystem
import com.julianpeeters.avro.annotations._
import io.gearpump.cluster.UserConfig
import io.gearpump.cluster.client.ClientContext
import io.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import io.gearpump.partitioner.ShufflePartitioner
import io.gearpump.streaming.kafka.{KafkaSource, KafkaStorageFactory}
import io.gearpump.streaming.source.DataSourceProcessor
import io.gearpump.streaming.{Processor, StreamApplication}
import io.gearpump.util.Graph._
import io.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

case class SpaceShuttleMessage(id: String, on: String, body: String)


@AvroRecord
case class SpaceShuttleRecord(var ts: Long, var anomaly: Double)

object PipeLine extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "reader"-> CLIOption[Int]("<kafka data reader number>", required = false, defaultValue = Some(2)),
    "scorer"-> CLIOption[Int]("<scorer number>", required = false, defaultValue = Some(2)),
    "writer"-> CLIOption[Int]("<parquet file writer number>", required = false, defaultValue = Some(1)),
    "output"-> CLIOption[String]("<output path directory>", required = false, defaultValue = Some("/parquet")),
    "topic" -> CLIOption[String]("<topic>", required = false, defaultValue = Some("topic-105")),
    "brokers" -> CLIOption[String]("<brokers>", required = false, defaultValue = Some("10.10.10.46:9092,10.10.10.164:9092,10.10.10.236:9092")),
    "zookeepers" -> CLIOption[String]("<zookeepers>", required = false, defaultValue = Some("10.10.10.46:2181,10.10.10.236:2181,10.10.10.164:2181/kafka"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val readerNum = config.getInt("reader")
    val scorerNum = config.getInt("scorer")
    val writerNum = config.getInt("writer")
    val outputPath = config.getString("output")
    val topic = config.getString("topic")
    val brokers = config.getString("brokers")
    val zookeepers = config.getString("zookeepers")
    val appConfig = UserConfig.empty.withString(ParquetWriterTask.PARQUET_OUTPUT_DIRECTORY, outputPath)
    val offsetStorageFactory = new KafkaStorageFactory(zookeepers, brokers)

    val partitioner = new ShufflePartitioner()
    val source = new KafkaSource(topic, zookeepers, offsetStorageFactory)
    val reader = DataSourceProcessor(source, readerNum)
    val scorer = Processor[ScoringTask](scorerNum)
    val writer = Processor[ParquetWriterTask](writerNum)

    val dag = Graph(reader ~ partitioner ~> scorer ~ partitioner ~> writer)
    val app = StreamApplication("KafkaHdfsPipeLine", dag, appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
} 
Example 84
Source File: PipeLineSpec.scala    From gearpump-examples   with Apache License 2.0 5 votes vote down vote up
package io.gearpump.examples.kafka_hdfs_pipeline

import akka.actor.ActorSystem
import io.gearpump._
import io.gearpump.cluster.UserConfig
import io.gearpump.streaming.task.{StartTime, Task, TaskContext}
import io.gearpump.streaming.transaction.api.TimeReplayableSource
import io.gearpump.util.LogUtil
import org.scalatest.prop.PropertyChecks
import org.scalatest.{BeforeAndAfterAll, Matchers, PropSpec}
import org.slf4j.Logger

import scala.util.{Failure, Success, Try}

class SpaceShuttleReplayableSource extends TimeReplayableSource {
  val data = Array[String](
    """
      |{"id":"2a329674-12ad-49f7-b40d-6485aae0aae8","on":"2015-04-02T18:52:02.680178753Z","body":"[-0.414141,-0.0246564,-0.125,0.0140301,-0.474359,0.0256049,-0.0980392,0.463884,0.40836]"}
    """
      .stripMargin,
    """
      |{"id":"043ade58-2fbc-4fe2-8253-84ab181b8cfa","on":"2015-04-02T18:52:02.680078434Z","body": "[-0.414141,-0.0246564,-0.125,0.0140301,-0.474359,0.0256049,-0.0980392,0.463884,0.40836]"}
    """.stripMargin,
    """
      |{"id":"043ade58-2fbc-4fe2-8253-84ab181b8cfa","on":"2015-04-02T18:52:02.680078434Z","body": "[-0.414141,-0.0246564,-0.125,0.0140301,-0.474359,0.0256049,-0.0980392,0.463884,0.40836]"}
    """.stripMargin
  )

  override def open(context: TaskContext, startTime: Option[TimeStamp]): Unit = {}

  override def read(num: Int): List[Message] = List(Message(data(0)), Message(data(1)), Message(data(2)))

  override def close(): Unit = {}
}

class SpaceShuttleProducer(taskContext : TaskContext, conf: UserConfig)
  extends Task(taskContext, conf) {

  import taskContext.{output, parallelism}

  private val batchSize = 3

  val taskParallelism = parallelism

  private val source: TimeReplayableSource = new SpaceShuttleReplayableSource()
  private var startTime: TimeStamp = 0L

  override def onStart(newStartTime: StartTime): Unit = {
    startTime = newStartTime.startTime
    LOG.info(s"start time $startTime")
    source.open(taskContext, Some(startTime))
    self ! Message("start", System.currentTimeMillis())
  }

  override def onNext(msg: Message): Unit = {
    Try({

      source.read(batchSize).foreach(msg => {
        output(msg)
      })
    }) match {
      case Success(ok) =>
      case Failure(throwable) =>
        LOG.error(s"failed ${throwable.getMessage}")
    }
    self ! Message("continue", System.currentTimeMillis())
  }

  override def onStop(): Unit = {
    LOG.info("closing kafka source...")
    source.close()
  }
}

class PipeLineSpec extends PropSpec with PropertyChecks with Matchers with BeforeAndAfterAll {
  val LOG: Logger = LogUtil.getLogger(getClass)
  implicit var system: ActorSystem = null

  override def beforeAll(): Unit = {
    system = ActorSystem("PipeLineSpec")
  }

  override def afterAll(): Unit = {
    system.shutdown()
  }

  property("PipeLineSpec should be able to create a DataSource") {
    Option(new SpaceShuttleReplayableSource) match {
      case Some(replayableSource) =>
      case None =>
        assert(false)
    }
  }
} 
Example 85
Source File: PipeLine.scala    From gearpump-examples   with Apache License 2.0 5 votes vote down vote up
package io.gearpump.examples.kafka_hbase_pipeline

import akka.actor.ActorSystem
import com.typesafe.config.ConfigFactory
import io.gearpump.cluster.UserConfig
import io.gearpump.cluster.client.ClientContext
import io.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import io.gearpump.streaming.kafka.{KafkaSource, KafkaStorageFactory}
import io.gearpump.streaming.source.DataSourceProcessor
import io.gearpump.streaming.{Processor, StreamApplication}
import io.gearpump.util.Graph._
import io.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object PipeLine extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val PROCESSORS = "pipeline.processors"
  val PERSISTORS = "pipeline.persistors"

  override val options: Array[(String, CLIOption[Any])] = Array(
    "processors"-> CLIOption[Int]("<processor number>", required = false, defaultValue = Some(1)),
    "persistors"-> CLIOption[Int]("<persistor number>", required = false, defaultValue = Some(1)),
    "topic" -> CLIOption[String]("<topic>", required = false, defaultValue = Some("gptest")),
    "brokers" -> CLIOption[String]("<brokers>", required = false, defaultValue = Some("10.10.10.46:9092,10.10.10.164:9092,10.10.10.236:9092")),
    "zookeepers" -> CLIOption[String]("<zookeepers>", required = false, defaultValue = Some("10.10.10.46:2181,10.10.10.236:2181,10.10.10.164:2181/kafka"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    import Messages._
    val pipelineString =
      """
        |pipeline {
        |  cpu.interval = 20
        |  memory.interval = 20
        |  processors = 1
        |  persistors = 1
        |}
        |hbase {
        |  table {
        |    name = "pipeline"
        |    column {
        |      family = "metrics"
        |      name = "average"
        |    }
        |  }
        |}
      """.stripMargin
    val pipelineConfig = PipeLineConfig(ConfigFactory.parseString(pipelineString))
    val processors = config.getInt("processors")
    val persistors = config.getInt("persistors")
    val topic = config.getString("topic")
    val brokers = config.getString("brokers")
    val zookeepers = config.getString("zookeepers")

    val appConfig = UserConfig.empty.withValue[PipeLineConfig](PIPELINE, pipelineConfig)

    val offsetStorageFactory = new KafkaStorageFactory(zookeepers, brokers)
    val source = new KafkaSource(topic, zookeepers, offsetStorageFactory)
    val kafka = DataSourceProcessor(source, 1)
    val cpuProcessor = Processor[CpuProcessor](processors, "CpuProcessor")
    val memoryProcessor = Processor[MemoryProcessor](processors, "MemoryProcessor")
    val cpuPersistor = Processor[CpuPersistor](persistors, "CpuPersistor")
    val memoryPersistor = Processor[MemoryPersistor](persistors, "MemoryPersistor")
    val app = StreamApplication("KafkaHbasePipeLine", Graph(
      kafka ~> cpuProcessor ~> cpuPersistor,
      kafka ~> memoryProcessor ~> memoryPersistor
    ), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }

} 
Example 86
Source File: Logging.scala    From graphframes   with Apache License 2.0 5 votes vote down vote up
package org.graphframes

import org.slf4j.{Logger, LoggerFactory}

// This needs to be accessible to org.apache.spark.graphx.lib.backport
private[org] trait Logging {

  @transient private lazy val logger: Logger = LoggerFactory.getLogger(getClass.getName)

  protected def logDebug(s: => String): Unit = {
    if (logger.isDebugEnabled) logger.debug(s)
  }

  protected def logWarn(s: => String): Unit = {
    if (logger.isWarnEnabled) logger.warn(s)
  }

  protected def logInfo(s: => String): Unit = {
    if (logger.isInfoEnabled) logger.info(s)
  }

  protected def logTrace(s: => String): Unit = {
    if (logger.isTraceEnabled) logger.trace(s)
  }
} 
Example 87
Source File: DistributedShellClient.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.examples.distributedshell

import java.util.concurrent.TimeUnit
import scala.concurrent.Await
import scala.concurrent.duration.Duration

import akka.pattern.ask
import org.slf4j.{Logger, LoggerFactory}

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster.ShellCommand
import org.apache.gearpump.util.{AkkaApp, Constants}


object DistributedShellClient extends AkkaApp with ArgumentsParser {
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LoggerFactory.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "appid" -> CLIOption[Int]("<the distributed shell appid>", required = true),
    "command" -> CLIOption[String]("<shell command>", required = true)
  )

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    implicit val system = context.system
    implicit val dispatcher = system.dispatcher
    val appid = config.getInt("appid")
    val command = config.getString("command")
    val appMaster = context.resolveAppID(appid)
    LOG.info(s"Resolved appMaster $appid address $appMaster, sending command $command")
    val future = (appMaster ? ShellCommand(command)).map { result =>
      LOG.info(s"Result: \n$result")
      context.close()
    }
    Await.ready(future, Duration(60, TimeUnit.SECONDS))
  }
} 
Example 88
Source File: DistributedShell.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.examples.distributedshell

import org.slf4j.Logger

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.cluster.{Application, UserConfig}
import org.apache.gearpump.util.{AkkaApp, LogUtil}


object DistributedShell extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array.empty

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    LOG.info(s"Distributed shell submitting application...")
    val context = ClientContext(akkaConf)
    val app = context.submit(Application[DistShellAppMaster]("DistributedShell",
    UserConfig.empty))
    context.close()
    LOG.info(s"Distributed Shell Application started with appId ${app.appId} !")
  }
} 
Example 89
Source File: ShellExecutor.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.examples.distributedshell

import scala.sys.process._
import scala.util.{Failure, Success, Try}

import akka.actor.Actor
import org.slf4j.Logger

import org.apache.gearpump.cluster.{ExecutorContext, UserConfig}
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster.{ShellCommand, ShellCommandResult}
import org.apache.gearpump.util.LogUtil


class ShellExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor {
  import executorContext._
  private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId)

  LOG.info(s"ShellExecutor started!")

  override def receive: Receive = {
    case ShellCommand(command) =>
      val process = Try(s"$command".!!)
      val result = process match {
        case Success(msg) => msg
        case Failure(ex) => ex.getMessage
      }
      sender ! ShellCommandResult(executorId, result)
  }
} 
Example 90
Source File: DistShellAppMaster.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.examples.distributedshell

import org.apache.gearpump.cluster.MasterToAppMaster.WorkerList

import scala.concurrent.Future

import akka.actor.{Deploy, Props}
import akka.pattern.{ask, pipe}
import akka.remote.RemoteScope
import com.typesafe.config.Config
import org.slf4j.Logger

import org.apache.gearpump.cluster.ClientToMaster.ShutdownApplication
import org.apache.gearpump.cluster.appmaster.ExecutorSystemScheduler.{ExecutorSystemJvmConfig, ExecutorSystemStarted, StartExecutorSystemTimeout}
import org.apache.gearpump.cluster._
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster._
import org.apache.gearpump.util.{ActorUtil, Constants, LogUtil, Util}

class DistShellAppMaster(appContext: AppMasterContext, app: AppDescription)
  extends ApplicationMaster {

  import appContext._
  import context.dispatcher
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LogUtil.getLogger(getClass, app = appId)
  protected var currentExecutorId = 0
  private var workerNum: Option[Int] = None

  override def preStart(): Unit = {
    LOG.info(s"Distributed Shell AppMaster started")
    ActorUtil.launchExecutorOnEachWorker(masterProxy, getExecutorJvmConfig, self)
  }

  override def receive: Receive = {
    case ExecutorSystemStarted(executorSystem, _) =>
      import executorSystem.{address, resource => executorResource, worker}
      val executorContext = ExecutorContext(currentExecutorId, worker, appId, app.name,
        self, executorResource)
      // Start executor
      val executor = context.actorOf(Props(classOf[ShellExecutor], executorContext, app.userConfig)
        .withDeploy(Deploy(scope = RemoteScope(address))), currentExecutorId.toString)
      executorSystem.bindLifeCycleWith(executor)
      currentExecutorId += 1
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case WorkerList(workers) =>
      workerNum = Some(workers.length)
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case StartExecutorSystemTimeout =>
      LOG.error(s"Failed to allocate resource in time")
      masterProxy ! ShutdownApplication(appId)
      context.stop(self)
    case msg: ShellCommand =>
      Future.fold(context.children.map(_ ? msg))(new ShellCommandResultAggregator) {
        (aggregator, response) => {
          aggregator.aggregate(response.asInstanceOf[ShellCommandResult])
        }
      }.map(_.toString()) pipeTo sender
  }

  private def getExecutorJvmConfig: ExecutorSystemJvmConfig = {
    val config: Config = app.clusterConfig
    val jvmSetting = Util.resolveJvmSetting(config.withFallback(context.system.settings.config))
      .executor
    ExecutorSystemJvmConfig(jvmSetting.classPath, jvmSetting.vmargs,
      appJar, username, config)
  }
}

object DistShellAppMaster {
  case class ShellCommand(command: String)

  case class ShellCommandResult(executorId: Int, msg: Any)

  class ShellCommandResultAggregator {
    val result: StringBuilder = new StringBuilder

    def aggregate(response: ShellCommandResult): ShellCommandResultAggregator = {
      result.append(s"Execute results from executor ${response.executorId} : \n")
      result.append(response.msg + "\n")
      this
    }

    override def toString: String = result.toString()
  }
} 
Example 91
Source File: SequenceFileIO.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.fsio

import org.apache.hadoop.conf.Configuration
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.examples.fsio.HadoopConfig._
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object SequenceFileIO extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<sequence file reader number>", required = false,
      defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<sequence file writer number>", required = false,
      defaultValue = Some(1)),
    "input" -> CLIOption[String]("<input file path>", required = true),
    "output" -> CLIOption[String]("<output file directory>", required = true)
  )

  def application(config: ParseResult): StreamApplication = {
    val spoutNum = config.getInt("source")
    val boltNum = config.getInt("sink")
    val input = config.getString("input")
    val output = config.getString("output")
    val appConfig = UserConfig.empty.withString(SeqFileStreamProducer.INPUT_PATH, input)
      .withString(SeqFileStreamProcessor.OUTPUT_PATH, output)
    val hadoopConfig = appConfig.withHadoopConf(new Configuration())
    val partitioner = new ShufflePartitioner()
    val streamProducer = Processor[SeqFileStreamProducer](spoutNum)
    val streamProcessor = Processor[SeqFileStreamProcessor](boltNum)

    val app = StreamApplication("SequenceFileIO",
      Graph(streamProducer ~ partitioner ~> streamProcessor), hadoopConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config))
    context.close()
  }
} 
Example 92
Source File: SOL.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.sol

import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object SOL extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "streamProducer" -> CLIOption[Int]("<stream producer number>", required = false,
    defaultValue = Some(1)),
    "streamProcessor" -> CLIOption[Int]("<stream processor number>", required = false,
    defaultValue = Some(1)),
    "bytesPerMessage" -> CLIOption[Int]("<size of each message>", required = false,
    defaultValue = Some(100)),
    "stages" -> CLIOption[Int]("<how many stages to run>", required = false,
    defaultValue = Some(2)))

  def application(config: ParseResult): StreamApplication = {
    val spoutNum = config.getInt("streamProducer")
    val boltNum = config.getInt("streamProcessor")
    val bytesPerMessage = config.getInt("bytesPerMessage")
    val stages = config.getInt("stages")
    val appConfig = UserConfig.empty.withInt(SOLStreamProducer.BYTES_PER_MESSAGE, bytesPerMessage)
    val partitioner = new ShufflePartitioner()
    val streamProducer = Processor[SOLStreamProducer](spoutNum)
    val streamProcessor = Processor[SOLStreamProcessor](boltNum)
    var computation = streamProducer ~ partitioner ~> streamProcessor
    computation = 0.until(stages - 2).foldLeft(computation) { (c, id) =>
      c ~ partitioner ~> streamProcessor.copy()
    }
    val dag = Graph(computation)
    val app = StreamApplication("sol", dag, appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config))
    context.close()
  }
} 
Example 93
Source File: Dag.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.complexdag

import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph.{Node => GraphNode}
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

case class Source_0(_context: TaskContext, _conf: UserConfig) extends Source(_context, _conf)
case class Source_1(_context: TaskContext, _conf: UserConfig) extends Source(_context, _conf)
case class Node_0(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_1(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_2(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_3(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_4(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Sink_0(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_1(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_2(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_3(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_4(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)


object Dag extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array.empty

  def application(config: ParseResult): StreamApplication = {

    val source_0 = Processor[Source_0](1)
    val source_1 = Processor[Source_1](1)
    val node_0 = Processor[Node_0](1)
    val node_1 = Processor[Node_1](1)
    val node_2 = Processor[Node_2](1)
    val node_3 = Processor[Node_3](1)
    val node_4 = Processor[Node_4](1)
    val sink_0 = Processor[Sink_0](1)
    val sink_1 = Processor[Sink_1](1)
    val sink_2 = Processor[Sink_2](1)
    val sink_3 = Processor[Sink_3](1)
    val sink_4 = Processor[Sink_4](1)
    val partitioner = new HashPartitioner
    val app = StreamApplication("dag", Graph(
      source_0 ~ partitioner ~> sink_1,
      source_0 ~ partitioner ~> sink_2,
      source_0 ~ partitioner ~> node_2,
      source_0 ~ partitioner ~> node_3,
      source_0 ~ partitioner ~> node_1,
      source_0 ~ partitioner ~> sink_0,
      node_2 ~ partitioner ~> node_3,
      node_1 ~ partitioner ~> node_3,
      node_1 ~ partitioner ~> sink_3,
      node_1 ~ partitioner ~> node_4,
      source_1 ~ partitioner ~> sink_4,
      source_1 ~ partitioner ~> node_0,
      node_3 ~ partitioner ~> sink_3,
      node_4 ~ partitioner ~> sink_3,
      node_0 ~ partitioner ~> sink_3
    ), UserConfig.empty)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val userConf = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(userConf))
    context.close()
  }
} 
Example 94
Source File: WindowAverageProcessor.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.state.processor

import scala.collection.immutable.TreeMap
import com.twitter.algebird.{AveragedGroup, AveragedValue}
import org.slf4j.Logger
import org.apache.gearpump.Message
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.streaming.monoid.AlgebirdGroup
import org.apache.gearpump.streaming.serializer.ChillSerializer
import org.apache.gearpump.streaming.state.api.{PersistentState, PersistentTask}
import org.apache.gearpump.streaming.state.impl.{Interval, Window, WindowConfig, WindowState}
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.util.LogUtil

object WindowAverageProcessor {
  val LOG: Logger = LogUtil.getLogger(classOf[WindowAverageProcessor])
}

class WindowAverageProcessor(taskContext: TaskContext, conf: UserConfig)
  extends PersistentTask[AveragedValue](taskContext, conf) {

  override def persistentState: PersistentState[AveragedValue] = {
    val group = new AlgebirdGroup(AveragedGroup)
    val serializer = new ChillSerializer[TreeMap[Interval, AveragedValue]]
    val window = new Window(conf.getValue[WindowConfig](WindowConfig.NAME).get)
    new WindowState[AveragedValue](group, serializer, taskContext, window)
  }

  override def processMessage(state: PersistentState[AveragedValue],
      message: Message): Unit = {
    val value = AveragedValue(message.value.asInstanceOf[String].toLong)
    state.update(message.timestamp.toEpochMilli, value)
  }
} 
Example 95
Source File: HBaseConn.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.hbase

import akka.actor.ActorSystem
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.external.hbase.HBaseSink
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object HBaseConn extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array(
    "splitNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sinkNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system

    val splitNum = config.getInt("splitNum")
    val sinkNum = config.getInt("sinkNum")

    val split = new Split
    val sourceProcessor = DataSourceProcessor(split, splitNum, "Split")
    val sink = HBaseSink(UserConfig.empty, "hbase")
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val application = StreamApplication("HBase", Graph(computation), UserConfig.empty)

    application

  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
} 
Example 96
Source File: WordCount.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.wordcount

import akka.actor.ActorSystem
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger


object WordCount extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array(
    "split" -> CLIOption[Int]("<how many source tasks>", required = false,
      defaultValue = Some(1)),
    "sum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system

    val sumNum = config.getInt("sum")
    val splitNum = config.getInt("split")
    val split = new Split
    val sourceProcessor = DataSourceProcessor(split, splitNum, "Split")
    val sum = Processor[Sum](sumNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> sum
    val app = StreamApplication("wordCount", Graph(computation), UserConfig.empty)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context: ClientContext = ClientContext(akkaConf)
    val app = application(config, context.system)
    context.submit(app)
    context.close()
  }
} 
Example 97
Source File: KuduConn.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.kudu

import akka.actor.ActorSystem
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.external.kudu.KuduSink
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object KuduConn extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array(
    "splitNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sinkNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val splitNum = config.getInt("splitNum")
    val sinkNum = config.getInt("sinkNum")

    val map = Map[String, String]("KUDUSINK" -> "kudusink", "kudu.masters" -> "kuduserver",
      "KUDU_USER" -> "kudu.user", "GEARPUMP_KERBEROS_PRINCIPAL" -> "gearpump.kerberos.principal",
      "GEARPUMP_KEYTAB_FILE" -> "gearpump.keytab.file", "TABLE_NAME" -> "kudu.table.name"
    )

    val userConfig = new UserConfig(map)
    val split = new Split
    val sourceProcessor = DataSourceProcessor(split, splitNum, "Split")
    val sink = KuduSink(userConfig, "impala::default.kudu_1")
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val application = StreamApplication("Kudu", Graph(computation), userConfig)

    application
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
} 
Example 98
Source File: KafkaReadWrite.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.kafka

import java.util.Properties

import akka.actor.ActorSystem
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.kafka._
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object KafkaReadWrite extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<hom many kafka producer tasks>", required = false,
      defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<hom many kafka processor tasks>", required = false,
      defaultValue = Some(1)),
    "zookeeperConnect" -> CLIOption[String]("<zookeeper connect string>", required = false,
      defaultValue = Some("localhost:2181")),
    "brokerList" -> CLIOption[String]("<broker server list string>", required = false,
      defaultValue = Some("localhost:9092")),
    "sourceTopic" -> CLIOption[String]("<kafka source topic>", required = false,
      defaultValue = Some("topic1")),
    "sinkTopic" -> CLIOption[String]("<kafka sink topic>", required = false,
      defaultValue = Some("topic2"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val appName = "KafkaReadWrite"
    val sourceNum = config.getInt("source")
    val sinkNum = config.getInt("sink")
    val zookeeperConnect = config.getString("zookeeperConnect")
    val brokerList = config.getString("brokerList")
    val sourceTopic = config.getString("sourceTopic")
    val sinkTopic = config.getString("sinkTopic")

    val appConfig = UserConfig.empty
    val props = new Properties
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeperConnect)
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
    val source = new KafkaSource(sourceTopic, props)
    val checkpointStoreFactory = new KafkaStoreFactory(props)
    source.setCheckpointStore(checkpointStoreFactory)
    val sourceProcessor = DataSourceProcessor(source, sourceNum)
    val sink = new KafkaSink(sinkTopic, props)
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new ShufflePartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val app = StreamApplication(appName, Graph(computation), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
} 
Example 99
Source File: KafkaWordCount.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.kafka.wordcount

import java.util.Properties

import akka.actor.ActorSystem
import kafka.api.OffsetRequest
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.kafka._
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object KafkaWordCount extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<how many kafka source tasks>", required = false,
      defaultValue = Some(1)),
    "split" -> CLIOption[Int]("<how many split tasks>", required = false, defaultValue = Some(1)),
    "sum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<how many kafka sink tasks>", required = false,
      defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val appName = "KafkaWordCount"
    val sourceNum = config.getInt("source")
    val splitNum = config.getInt("split")
    val sumNum = config.getInt("sum")
    val sinkNum = config.getInt("sink")
    val appConfig = UserConfig.empty
    val props = new Properties
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181")
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    props.put(KafkaConfig.CONSUMER_START_OFFSET_CONFIG,
      new java.lang.Long(OffsetRequest.LatestTime))
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
    val sourceTopic = "topic1"
    val source = new KafkaSource(sourceTopic, props)
    val checkpointStoreFactory = new KafkaStoreFactory(props)
    source.setCheckpointStore(checkpointStoreFactory)
    val sourceProcessor = DataSourceProcessor(source, sourceNum)
    val split = Processor[Split](splitNum)
    val sum = Processor[Sum](sumNum)
    val sink = new KafkaSink("topic2", props)
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> split ~ partitioner ~>
      sum ~ partitioner ~> sinkProcessor
    val app = StreamApplication(appName, Graph(computation), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
} 
Example 100
Source File: DistServiceExecutor.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.distributeservice

import java.io.{File, FileWriter}
import java.net.InetAddress
import scala.collection.JavaConverters._
import scala.io.Source
import scala.sys.process._
import scala.util.{Failure, Success, Try}

import akka.actor.Actor
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.text.StrSubstitutor
import org.slf4j.Logger

import org.apache.gearpump.cluster.{ExecutorContext, UserConfig}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.InstallService
import org.apache.gearpump.util.{ActorUtil, LogUtil}

class DistServiceExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor {
  import executorContext._
  private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId)

  override def receive: Receive = {
    case InstallService(url, zipFileName, targetPath, scriptData, serviceName, serviceSettings) =>
      LOG.info(s"Executor $executorId receive command to install " +
        s"service $serviceName to $targetPath")
      unzipFile(url, zipFileName, targetPath)
      installService(scriptData, serviceName, serviceSettings)
  }

  private def unzipFile(url: String, zipFileName: String, targetPath: String) = {
    val zipFile = File.createTempFile(System.currentTimeMillis().toString, zipFileName)
    val dir = new File(targetPath)
    if (dir.exists()) {
      FileUtils.forceDelete(dir)
    }
    val bytes = FileServer.newClient.get(url).get
    FileUtils.writeByteArrayToFile(zipFile, bytes)
    val result = Try(s"unzip ${zipFile.getAbsolutePath} -d $targetPath".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor $executorId unzip file to $targetPath")
      case Failure(ex) => throw ex
    }
  }

  private def installService(
      scriptData: Array[Byte], serviceName: String, serviceSettings: Map[String, Any]) = {
    val tempFile = File.createTempFile("gearpump", serviceName)
    FileUtils.writeByteArrayToFile(tempFile, scriptData)
    val script = new File("/etc/init.d", serviceName)
    writeFileWithEnvVariables(tempFile, script, serviceSettings ++ getEnvSettings)
    val result = Try(s"chkconfig --add $serviceName".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor install service $serviceName successfully!")
      case Failure(ex) => throw ex
    }
  }

  private def getEnvSettings: Map[String, Any] = {
    Map("workerId" -> worker,
      "localhost" -> ActorUtil.getSystemAddress(context.system).host.get,
      "hostname" -> InetAddress.getLocalHost.getHostName)
  }

  private def writeFileWithEnvVariables(source: File, target: File, envs: Map[String, Any]) = {
    val writer = new FileWriter(target)
    val sub = new StrSubstitutor(envs.asJava)
    sub.setEnableSubstitutionInVariables(true)
    Source.fromFile(source).getLines().foreach(line => writer.write(sub.replace(line) + "\r\n"))
    writer.close()
  }
} 
Example 101
Source File: DistServiceAppMaster.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.distributeservice

import java.io.File
import org.apache.gearpump.cluster.MasterToAppMaster.WorkerList

import scala.concurrent.Future

import akka.actor.{Deploy, Props}
import akka.pattern.{ask, pipe}
import akka.remote.RemoteScope
import com.typesafe.config.Config
import org.slf4j.Logger

import org.apache.gearpump.cluster.ClientToMaster.ShutdownApplication
import org.apache.gearpump.cluster.appmaster.ExecutorSystemScheduler.{ExecutorSystemJvmConfig, ExecutorSystemStarted, StartExecutorSystemTimeout}
import org.apache.gearpump.cluster.{AppDescription, AppMasterContext, ApplicationMaster, ExecutorContext}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.{FileContainer, GetFileContainer, InstallService}
import org.apache.gearpump.util._

class DistServiceAppMaster(appContext: AppMasterContext, app: AppDescription)
  extends ApplicationMaster {
  import appContext._
  import context.dispatcher
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LogUtil.getLogger(getClass, app = appId)
  private var currentExecutorId = 0
  private var workerNum: Option[Int] = None
  private var fileServerPort = -1

  val rootDirectory = new File("/")
  val host = context.system.settings.config.getString(Constants.GEARPUMP_HOSTNAME)
  val server = context.actorOf(Props(classOf[FileServer], rootDirectory, host, 0))

  override def preStart(): Unit = {
    LOG.info(s"Distribute Service AppMaster started")
    ActorUtil.launchExecutorOnEachWorker(masterProxy, getExecutorJvmConfig, self)
  }

  (server ? FileServer.GetPort).asInstanceOf[Future[FileServer.Port]] pipeTo self

  override def receive: Receive = {
    case ExecutorSystemStarted(executorSystem, _) =>
      import executorSystem.{address, resource => executorResource, worker}
      val executorContext = ExecutorContext(currentExecutorId, worker,
        appId, app.name, self, executorResource)
      // start executor
      val executor = context.actorOf(Props(classOf[DistServiceExecutor],
        executorContext, app.userConfig).withDeploy(
        Deploy(scope = RemoteScope(address))), currentExecutorId.toString)
      executorSystem.bindLifeCycleWith(executor)
      currentExecutorId += 1
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case WorkerList(workers) =>
      workerNum = Some(workers.length)
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case StartExecutorSystemTimeout =>
      LOG.error(s"Failed to allocate resource in time")
      masterProxy ! ShutdownApplication(appId)
      context.stop(self)
    case FileServer.Port(port) =>
      this.fileServerPort = port
    case GetFileContainer =>
      val name = Math.abs(new java.util.Random().nextLong()).toString
      sender ! new FileContainer(s"http://$host:$fileServerPort/$name")
    case installService: InstallService =>
      context.children.foreach(_ ! installService)
  }

  private def getExecutorJvmConfig: ExecutorSystemJvmConfig = {
    val config: Config = app.clusterConfig
    val jvmSetting = Util.resolveJvmSetting(
      config.withFallback(context.system.settings.config)).executor
    ExecutorSystemJvmConfig(jvmSetting.classPath, jvmSetting.vmargs,
      appJar, username, config)
  }
}

object DistServiceAppMaster {
  case object GetFileContainer

  case class FileContainer(url: String)

  case class InstallService(
      url: String,
      zipFileName: String,
      targetPath: String,
      script: Array[Byte],
      serviceName: String,
      serviceSettings: Map[String, Any])
} 
Example 102
Source File: DistributeService.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.distributeservice

import org.slf4j.Logger

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.cluster.{Application, UserConfig}
import org.apache.gearpump.util.{AkkaApp, LogUtil}


object DistributeService extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array.empty

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    LOG.info(s"Distribute Service submitting application...")
    val context = ClientContext(akkaConf)
    val app = context.submit(Application[DistServiceAppMaster]("DistributedService",
      UserConfig.empty))
    context.close()
    LOG.info(s"Distribute Service Application started with appId ${app.appId} !")
  }
} 
Example 103
Source File: FetchThread.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.source.consumer

import java.nio.channels.ClosedByInterruptException
import java.util.concurrent.LinkedBlockingQueue

import kafka.common.TopicAndPartition
import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.util.LogUtil

object FetchThread {
  private val LOG: Logger = LogUtil.getLogger(classOf[FetchThread])

  val factory = new FetchThreadFactory

  class FetchThreadFactory extends java.io.Serializable {
    def getFetchThread(config: KafkaConfig, client: KafkaClient): FetchThread = {
      val fetchThreshold = config.getInt(KafkaConfig.FETCH_THRESHOLD_CONFIG)
      val fetchSleepMS = config.getLong(KafkaConfig.FETCH_SLEEP_MS_CONFIG)
      val startOffsetTime = config.getLong(KafkaConfig.CONSUMER_START_OFFSET_CONFIG)
      FetchThread(fetchThreshold, fetchSleepMS, startOffsetTime, client)
    }
  }

  def apply(fetchThreshold: Int,
      fetchSleepMS: Long,
      startOffsetTime: Long,
      client: KafkaClient): FetchThread = {
    val createConsumer = (tp: TopicAndPartition) =>
      client.createConsumer(tp.topic, tp.partition, startOffsetTime)
    val incomingQueue = new LinkedBlockingQueue[KafkaMessage]()
    val sleeper = new ExponentialBackoffSleeper(
      backOffMultiplier = 2.0,
      initialDurationMs = 100L,
      maximumDurationMs = 10000L)
    new FetchThread(createConsumer, incomingQueue, sleeper, fetchThreshold, fetchSleepMS)
  }
}


  private def fetchMessage: Boolean = {
    if (incomingQueue.size >= fetchThreshold) {
      false
    } else {
      consumers.foldLeft(false) { (hasNext, tpAndConsumer) =>
        val (_, consumer) = tpAndConsumer
        if (consumer.hasNext) {
          incomingQueue.put(consumer.next())
          true
        } else {
          hasNext
        }
      }
    }
  }

  private def createAllConsumers: Map[TopicAndPartition, KafkaConsumer] = {
    topicAndPartitions.map(tp => tp -> createConsumer(tp)).toMap
  }

  private def resetConsumers(nextOffsets: Map[TopicAndPartition, Long]): Unit = {
    consumers.values.foreach(_.close())
    consumers = createAllConsumers
    consumers.foreach { case (tp, consumer) =>
      consumer.setStartOffset(nextOffsets(tp))
    }
  }
} 
Example 104
Source File: CGroupProcessLauncher.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.worker

import java.io.File
import scala.sys.process.Process

import com.typesafe.config.Config
import org.slf4j.{Logger, LoggerFactory}

import org.apache.gearpump.cluster.scheduler.Resource
import org.apache.gearpump.util.{ProcessLogRedirector, RichProcess}


class CGroupProcessLauncher(val config: Config) extends ExecutorProcessLauncher {
  private val APP_MASTER = -1
  private val cgroupManager: Option[CGroupManager] = CGroupManager.getInstance(config)
  private val LOG: Logger = LoggerFactory.getLogger(getClass)

  override def cleanProcess(appId: Int, executorId: Int): Unit = {
    if (executorId != APP_MASTER) {
      cgroupManager.foreach(_.shutDownExecutor(appId, executorId))
    }
  }

  override def createProcess(
      appId: Int, executorId: Int, resource: Resource, appConfig: Config, options: Array[String],
    classPath: Array[String], mainClass: String, arguments: Array[String]): RichProcess = {
    val cgroupCommand = if (executorId != APP_MASTER) {
      cgroupManager.map(_.startNewExecutor(appConfig, resource.slots, appId,
        executorId)).getOrElse(List.empty)
    } else List.empty
    LOG.info(s"Launch executor $executorId with CGroup ${cgroupCommand.mkString(" ")}, " +
      s"classpath: ${classPath.mkString(File.pathSeparator)}")

    val java = System.getProperty("java.home") + "/bin/java"
    val command = cgroupCommand ++ List(java) ++ options ++ List("-cp", classPath
      .mkString(File.pathSeparator), mainClass) ++ arguments
    LOG.info(s"Starting executor process java $mainClass ${arguments.mkString(" ")}; " +
      s"options: ${options.mkString(" ")}")
    val logger = new ProcessLogRedirector()
    val process = Process(command).run(logger)
    new RichProcess(process, logger)
  }
} 
Example 105
Source File: StormRunner.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.storm

import org.apache.gearpump.experiments.storm.main.{GearpumpNimbus, GearpumpStormClient}
import org.apache.gearpump.util.LogUtil
import org.slf4j.Logger

object StormRunner {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  private val commands = Map("nimbus" -> GearpumpNimbus, "app" -> GearpumpStormClient)

  private def usage(): Unit = {
    val keys = commands.keys.toList.sorted
    // scalastyle:off println
    Console.err.println("Usage: " + "<" + keys.mkString("|") + ">")
    // scalastyle:on println
  }

  private def executeCommand(command: String, commandArgs: Array[String]): Unit = {
    if (!commands.contains(command)) {
      usage()
    } else {
      commands(command).main(commandArgs)
    }
  }

  def main(args: Array[String]): Unit = {
    if (args.length == 0) {
      usage()
    } else {
      val command = args(0)
      val commandArgs = args.drop(1)
      executeCommand(command, commandArgs)
    }
  }
} 
Example 106
Source File: ContainerLaunchContext.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.yarn.glue

import java.io.File
import java.nio.ByteBuffer
import scala.collection.JavaConverters._

import org.apache.hadoop.fs.{FileSystem => YarnFileSystem, Path}
import org.apache.hadoop.io.DataOutputBuffer
import org.apache.hadoop.mapreduce.security.TokenCache
import org.apache.hadoop.security.UserGroupInformation
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
import org.apache.hadoop.yarn.api.records._
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.util.ConverterUtils
import org.slf4j.Logger

import org.apache.gearpump.util.LogUtil

private[glue]
object ContainerLaunchContext {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  def apply(yarnConf: YarnConfiguration, command: String, packagePath: String, configPath: String)
    : ContainerLaunchContext = {
    val context = Records.newRecord(classOf[ContainerLaunchContext])
    context.setCommands(Seq(command).asJava)
    context.setEnvironment(getAppEnv(yarnConf).asJava)
    context.setTokens(getToken(yarnConf, packagePath, configPath))
    context.setLocalResources(getAMLocalResourcesMap(yarnConf, packagePath, configPath).asJava)
    context
  }

  private def getFs(yarnConf: YarnConfiguration) = YarnFileSystem.get(yarnConf)

  private def getAppEnv(yarnConf: YarnConfiguration): Map[String, String] = {
    val classPaths = yarnConf.getStrings(
      YarnConfiguration.YARN_APPLICATION_CLASSPATH,
      YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH.mkString(File.pathSeparator))
    val allPaths = Option(classPaths).getOrElse(Array(""))

    allPaths :+ Environment.PWD.$() + File.separator + "*" + File.pathSeparator

    Map(Environment.CLASSPATH.name -> allPaths.map(_.trim).mkString(File.pathSeparator))
  }

  private def getAMLocalResourcesMap(
      yarnConf: YarnConfiguration, packagePath: String, configPath: String)
    : Map[String, LocalResource] = {
    val fs = getFs(yarnConf)

    Map(
      "pack" -> newYarnAppResource(fs, new Path(packagePath),
        LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION),
      "conf" -> newYarnAppResource(fs, new Path(configPath),
        LocalResourceType.FILE, LocalResourceVisibility.APPLICATION))
  }

  private def newYarnAppResource(
      fs: YarnFileSystem, path: Path,
      resourceType: LocalResourceType, vis: LocalResourceVisibility): LocalResource = {
    val qualified = fs.makeQualified(path)
    val status = fs.getFileStatus(qualified)
    val resource = Records.newRecord(classOf[LocalResource])
    resource.setType(resourceType)
    resource.setVisibility(vis)
    resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified))
    resource.setTimestamp(status.getModificationTime)
    resource.setSize(status.getLen)
    resource
  }

  private def getToken(yc: YarnConfiguration, packagePath: String, configPath: String)
    : ByteBuffer = {
    val credentials = UserGroupInformation.getCurrentUser.getCredentials
    val dob = new DataOutputBuffer
    val dirs = Array(new Path(packagePath), new Path(configPath))
    TokenCache.obtainTokensForNamenodes(credentials, dirs, yc)
    credentials.writeTokenStorageToStream(dob)
    ByteBuffer.wrap(dob.getData)
  }
} 
Example 107
Source File: Client.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.yarn.client

import org.apache.gearpump.util.LogUtil
import org.slf4j.Logger


object Client {

  private val LOG: Logger = LogUtil.getLogger(getClass)
  val LAUNCH = "launch"

  val commands = Map(LAUNCH -> LaunchCluster) ++
    ManageCluster.commands.map(key => (key, ManageCluster)).toMap

  def usage(): Unit = {
    val keys = commands.keys.toList.sorted
    // scalastyle:off println
    Console.err.println("Usage: " + "<" + keys.mkString("|") + ">")
    // scalastyle:on println
  }

  def main(args: Array[String]): Unit = {
    if (args.length == 0) {
      usage()
    } else {
      val key = args(0)
      val command = commands.get(key)
      command match {
        case Some(command) =>
          if (key == LAUNCH) {
            val remainArgs = args.drop(1)
            command.main(remainArgs)
          } else {
            val commandArg = Array("-" + ManageCluster.COMMAND, key)
            val remainArgs = args.drop(1)
            val updatedArgs = commandArg ++ args.drop(1)
            command.main(updatedArgs)
          }
        case None =>
          usage
      }
    }
  }
} 
Example 108
Source File: NonWindowState.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.state.impl

import org.slf4j.Logger

import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.streaming.state.api.{Monoid, MonoidState, Serializer}
import org.apache.gearpump.streaming.state.impl.NonWindowState._
import org.apache.gearpump.util.LogUtil

object NonWindowState {
  val LOG: Logger = LogUtil.getLogger(classOf[NonWindowState[_]])
}


class NonWindowState[T](monoid: Monoid[T], serializer: Serializer[T])
  extends MonoidState[T](monoid) {

  override def recover(timestamp: MilliSeconds, bytes: Array[Byte]): Unit = {
    serializer.deserialize(bytes).foreach(left = _)
  }

  override def update(timestamp: MilliSeconds, t: T): Unit = {
    updateState(timestamp, t)
  }

  override def checkpoint(): Array[Byte] = {
    val serialized = serializer.serialize(left)
    LOG.debug(s"checkpoint time: $checkpointTime; checkpoint value: ($checkpointTime, $left)")
    left = monoid.plus(left, right)
    right = monoid.zero
    serialized
  }
} 
Example 109
Source File: StreamingTransportSerializer.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.task

import java.io.{DataInput, DataOutput}

import org.slf4j.Logger

import org.apache.gearpump.streaming.{AckRequestSerializer, AckSerializer, InitialAckRequestSerializer, LatencyProbeSerializer}
import org.apache.gearpump.transport.netty.ITransportMessageSerializer
import org.apache.gearpump.util.LogUtil

class StreamingTransportSerializer extends ITransportMessageSerializer {
  private val log: Logger = LogUtil.getLogger(getClass)
  private val serializers = new SerializerResolver

  serializers.register(classOf[Ack], new AckSerializer)
  serializers.register(classOf[AckRequest], new AckRequestSerializer)
  serializers.register(classOf[InitialAckRequest], new InitialAckRequestSerializer)
  serializers.register(classOf[LatencyProbe], new LatencyProbeSerializer)
  serializers.register(classOf[SerializedMessage], new SerializedMessageSerializer)

  override def serialize(dataOutput: DataOutput, obj: Object): Unit = {
    val registration = serializers.getRegistration(obj.getClass)
    if (registration != null) {
      dataOutput.writeInt(registration.id)
      registration.serializer.asInstanceOf[TaskMessageSerializer[AnyRef]].write(dataOutput, obj)
    } else {
      log.error(s"Can not find serializer for class type ${obj.getClass}")
    }
  }

  override def deserialize(dataInput: DataInput, length: Int): Object = {
    val classID = dataInput.readInt()
    val registration = serializers.getRegistration(classID)
    if (registration != null) {
      registration.serializer.asInstanceOf[TaskMessageSerializer[AnyRef]].read(dataInput)
    } else {
      log.error(s"Can not find serializer for class id $classID")
      null
    }
  }

  override def getLength(obj: Object): Int = {
    val registration = serializers.getRegistration(obj.getClass)
    if (registration != null) {
      registration.serializer.asInstanceOf[TaskMessageSerializer[AnyRef]].getLength(obj) + 4
    } else {
      log.error(s"Can not find serializer for class type ${obj.getClass}")
      0
    }
  }
} 
Example 110
Source File: Context.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.transport.netty

import java.io.Closeable
import java.util.concurrent._

import scala.collection.JavaConverters._

import akka.actor.{ActorRef, ActorSystem, Props}
import com.typesafe.config.Config
import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory
import org.slf4j.Logger

import org.apache.gearpump.transport.netty.Server.ServerPipelineFactory
import org.apache.gearpump.transport.{ActorLookupById, HostPort}
import org.apache.gearpump.util.{Constants, LogUtil}

object Context {
  private final val LOG: Logger = LogUtil.getLogger(getClass)
}


  def close(): Unit = {

    LOG.info(s"Context.term, cleanup resources...., " +
      s"we have ${closeHandler.size()} items to close...")

    // Cleans up resource in reverse order so that client actor can be cleaned
    // before clientChannelFactory
    closeHandler.iterator().asScala.toList.reverse.foreach(_.close())
  }
} 
Example 111
Source File: GearpumpSerialization.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.serializer

import com.esotericsoftware.kryo.{Kryo, Serializer => KryoSerializer}
import com.typesafe.config.Config
import org.slf4j.Logger
import org.apache.gearpump.util.{Constants, LogUtil}

class GearpumpSerialization(config: Config) {

  private val LOG: Logger = LogUtil.getLogger(getClass)

  def customize(kryo: Kryo): Unit = {

    val serializationMap = configToMap(config, Constants.GEARPUMP_SERIALIZERS)

    serializationMap.foreach { kv =>
      val (key, value) = kv
      val keyClass = Class.forName(key)

      if (value == null || value.isEmpty) {

        // Use default serializer for this class type
        kryo.register(keyClass)
      } else {
        val valueClass = Class.forName(value)
        val register = kryo.register(keyClass,
          valueClass.newInstance().asInstanceOf[KryoSerializer[_]])
        LOG.debug(s"Registering ${keyClass}, id: ${register.getId}")
      }
    }
    kryo.setReferences(false)

    // Requires the user to register the class first before using
    kryo.setRegistrationRequired(true)
  }

  private final def configToMap(config: Config, path: String) = {
    import scala.collection.JavaConverters._
    config.getConfig(path).root.unwrapped.asScala.toMap map { case (k, v) => k -> v.toString }
  }
} 
Example 112
Source File: Worker.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.main

import akka.actor.{ActorSystem, Props}
import org.apache.gearpump.cluster.ClusterConfig
import org.apache.gearpump.cluster.master.MasterProxy
import org.apache.gearpump.cluster.worker.{Worker => WorkerActor}
import org.apache.gearpump.transport.HostPort
import org.apache.gearpump.util.Constants._
import org.apache.gearpump.util.LogUtil.ProcessType
import org.apache.gearpump.util.{AkkaApp, LogUtil}
import org.slf4j.Logger

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration.Duration


object Worker extends AkkaApp with ArgumentsParser {
  protected override def akkaConfig = ClusterConfig.worker()

  override val description = "Start a worker daemon"

  var LOG: Logger = LogUtil.getLogger(getClass)

  private def uuid = java.util.UUID.randomUUID.toString

  def main(akkaConf: Config, args: Array[String]): Unit = {
    val id = uuid

    this.LOG = {
      LogUtil.loadConfiguration(akkaConf, ProcessType.WORKER)
      // Delay creation of LOG instance to avoid creating an empty log file as we
      // reset the log file name here
      LogUtil.getLogger(getClass)
    }

    val system = ActorSystem(id, akkaConf)

    val masterAddress = akkaConf.getStringList(GEARPUMP_CLUSTER_MASTERS).asScala.map { address =>
      val hostAndPort = address.split(":")
      HostPort(hostAndPort(0), hostAndPort(1).toInt)
    }

    LOG.info(s"Trying to connect to masters " + masterAddress.mkString(",") + "...")
    val masterProxy = system.actorOf(MasterProxy.props(masterAddress), s"masterproxy${system.name}")

    system.actorOf(Props(classOf[WorkerActor], masterProxy),
      classOf[WorkerActor].getSimpleName + id)

    Await.result(system.whenTerminated, Duration.Inf)
  }
} 
Example 113
Source File: Local.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.main

import akka.actor.{ActorSystem, Props}
import com.typesafe.config.ConfigValueFactory
import org.apache.gearpump.cluster.ClusterConfig
import org.apache.gearpump.cluster.master.{Master => MasterActor}
import org.apache.gearpump.cluster.worker.{Worker => WorkerActor}
import org.apache.gearpump.util.Constants._
import org.apache.gearpump.util.LogUtil.ProcessType
import org.apache.gearpump.util.{ActorUtil, Constants, LogUtil, MasterClientCommand, Util}
import org.slf4j.Logger

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration.Duration

object Local extends MasterClientCommand with ArgumentsParser {
  override def akkaConfig: Config = ClusterConfig.master()

  var LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] =
    Array("sameprocess" -> CLIOption[Boolean]("", required = false, defaultValue = Some(false)),
      "workernum" -> CLIOption[Int]("<how many workers to start>", required = false,
        defaultValue = Some(2)))

  override val description = "Start a local cluster"

  def main(akkaConf: Config, args: Array[String]): Unit = {

    this.LOG = {
      LogUtil.loadConfiguration(akkaConf, ProcessType.LOCAL)
      LogUtil.getLogger(getClass)
    }

    val config = parse(args)
    if (null != config) {
      local(config.getInt("workernum"), config.getBoolean("sameprocess"), akkaConf)
    }
  }

  def local(workerCount: Int, sameProcess: Boolean, akkaConf: Config): Unit = {
    if (sameProcess) {
      LOG.info("Starting local in same process")
      System.setProperty("LOCAL", "true")
    }
    val masters = akkaConf.getStringList(Constants.GEARPUMP_CLUSTER_MASTERS)
      .asScala.flatMap(Util.parseHostList)
    val local = akkaConf.getString(Constants.GEARPUMP_HOSTNAME)

    if (masters.size != 1 && masters.head.host != local) {
      LOG.error(s"The ${Constants.GEARPUMP_CLUSTER_MASTERS} is not match " +
        s"with ${Constants.GEARPUMP_HOSTNAME}")
    } else {

      val hostPort = masters.head
      implicit val system = ActorSystem(MASTER, akkaConf.
        withValue("akka.remote.netty.tcp.port", ConfigValueFactory.fromAnyRef(hostPort.port))
      )

      val master = system.actorOf(Props[MasterActor], MASTER)
      val masterPath = ActorUtil.getSystemAddress(system).toString + s"/user/$MASTER"

      0.until(workerCount).foreach { id =>
        system.actorOf(Props(classOf[WorkerActor], master), classOf[WorkerActor].getSimpleName + id)
      }

      Await.result(system.whenTerminated, Duration.Inf)
    }
  }
} 
Example 114
Source File: PresenceUpdater.scala    From AckCord   with MIT License 5 votes vote down vote up
package ackcord.cachehandlers

import ackcord.data.{Guild, GuildMember, Presence}
import ackcord.gateway.GatewayEvent.PresenceUpdateData
import org.slf4j.Logger

object PresenceUpdater extends CacheUpdater[PresenceUpdateData] {
  override def handle(builder: CacheSnapshotBuilder, obj: PresenceUpdateData, registry: CacheTypeRegistry)(
      implicit log: Logger
  ): Unit = {
    val PresenceUpdateData(partialUser, roles, rawActivity, guildId, status, _, clientStatus, premiumSince, nick) = obj

    registry.updateData(builder)(partialUser)

    for {
      guildHandler <- registry.getUpdater[Guild]
      oldGuild     <- builder.guildMap.get(guildId)
    } {

      val presencesToUse = if (registry.hasUpdater[Presence]) {
        val newActivity = rawActivity.map(_.toActivity).flatMap {
          case Right(activity) => Some(activity)
          case Left(e) =>
            log.warn(e)
            None
        }

        val newPresence = Presence(partialUser.id, newActivity, status, clientStatus)
        oldGuild.presences.updated(partialUser.id, newPresence)
      } else {
        oldGuild.presences
      }

      val oldMembers = oldGuild.members
      val membersToUse = if (registry.hasUpdater[GuildMember]) {
        oldMembers
          .get(partialUser.id)
          .map(member => oldMembers.updated(partialUser.id, member.copy(roleIds = roles, nick = nick)))
          .getOrElse(oldMembers)
      } else {
        oldMembers
      }

      val newGuild = oldGuild.copy(presences = presencesToUse, members = membersToUse)

      guildHandler.handle(builder, newGuild, registry)
    }
  }
} 
Example 115
Source File: JVMObjectTracker.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off

private[r] object JVMObjectTracker {
  @transient
  protected lazy val logger: Logger = LoggerFactory.getLogger(getClass.getName)
  private[this] val objMap = new TrieMap[String, Object]
  private[this] val objCounter = new AtomicInteger(0)

  def getObject(id: String): Object = {
    logger.info(s"Get object at  $id")
    objMap(id)
  }

  def get(id: String): Option[Object] = {
    logger.info(s"Get object at $id")
    objMap.get(id)
  }

  def put(obj: Object): String = {
    val objId = objCounter.getAndIncrement.toString
    val objName = obj.getClass.getName
    logger.info(s"Puts $objName at $objId ")
    objMap.put(objId, obj)
    objId
  }

  def remove(id: String): Option[Object] = {
    logger.info(s"Removed $id")
    objMap.remove(id)
  }

} 
Example 116
Source File: LoggingSerializationSpec.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.commons

import org.slf4j.Logger

import io.deepsense.commons.serialization.Serialization
import io.deepsense.commons.utils.Logging

class LoggingSerializationSpec
  extends StandardSpec
  with UnitTestSupport
  with Serialization {

  "Object" when {
    "mixes-in SerializableLogging" should {
      "be serializable" in {
        val testObject = new SerializableTestObject()
        testObject.getLogger.trace("Logging just to force initiation of lazy logger")
        val deserialized = serializeDeserialize[SerializableTestObject](testObject)
        deserialized.getLogger should not be null
        deserialized.getLogger.trace("If this is printed everything is OK")
      }
    }
  }
}

class SerializableTestObject extends Serializable with Logging {
  def getLogger: Logger = this.logger
} 
Example 117
Source File: LoggingServerInterceptor.scala    From scala-server-toolkit   with MIT License 5 votes vote down vote up
package com.avast.sst.grpc.server.interceptor

import io.grpc.ForwardingServerCall.SimpleForwardingServerCall
import io.grpc.ForwardingServerCallListener.SimpleForwardingServerCallListener
import io.grpc._
import org.slf4j.Logger


class LoggingServerInterceptor(logger: Logger) extends ServerInterceptor {

  override def interceptCall[ReqT, RespT](
      call: ServerCall[ReqT, RespT],
      headers: Metadata,
      next: ServerCallHandler[ReqT, RespT]
  ): ServerCall.Listener[ReqT] = {
    val methodName = call.getMethodDescriptor.getFullMethodName
    val finalCall = new CloseServerCall(methodName, call)
    new OnMessageServerCallListener(methodName, next.startCall(finalCall, headers))
  }

  private class CloseServerCall[A, B](methodName: String, delegate: ServerCall[A, B]) extends SimpleForwardingServerCall[A, B](delegate) {
    override def close(status: Status, trailers: Metadata): Unit = {
      import io.grpc.Status
      if ((status.getCode eq Status.Code.UNKNOWN) || (status.getCode eq Status.Code.INTERNAL)) {
        logger.error(
          String.format(
            "Error response from method %s: %s %s",
            methodName,
            status.getCode,
            status.getDescription
          ),
          status.getCause
        )
      } else if (!status.isOk) {
        logger.warn(
          String.format(
            "Error response from method %s: %s %s",
            methodName,
            status.getCode,
            status.getDescription
          ),
          status.getCause
        )
      } else {
        logger.debug("Successful response from method {}: {}", Array(methodName, status): _*)
      }
      super.close(status, trailers)
    }
  }

  private class OnMessageServerCallListener[A](methodName: String, delegate: ServerCall.Listener[A])
      extends SimpleForwardingServerCallListener[A](delegate) {
    override def onMessage(message: A): Unit = {
      logger.debug("Dispatching method {}", methodName)
      super.onMessage(message)
    }
  }

} 
Example 118
Source File: MysqlSink1.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.jdbcOutput

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import org.apache.flink.api.common.io.OutputFormat
import org.apache.flink.configuration.Configuration
import org.slf4j.{Logger, LoggerFactory}

class MysqlSink1 extends OutputFormat[User]{

  val logger: Logger = LoggerFactory.getLogger("MysqlSink1")
  var conn: Connection = _
  var ps: PreparedStatement = _
  val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
  val username = "root"
  val password = "123456"
  val driverName = "com.mysql.jdbc.Driver"

  override def configure(parameters: Configuration): Unit = {
    // not need
  }

  override def open(taskNumber: Int, numTasks: Int): Unit = {
    Class.forName(driverName)
    try {
      Class.forName(driverName)
      conn = DriverManager.getConnection(jdbcUrl, username, password)

      // close auto commit
      conn.setAutoCommit(false)
    } catch {
      case e@(_: ClassNotFoundException | _: SQLException) =>
        logger.error("init mysql error")
        e.printStackTrace()
        System.exit(-1);
    }
  }

  override def writeRecord(user: User): Unit = {

    println("get user : " + user.toString)
    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
    ps.setString(1, user.username)
    ps.setString(2, user.password)
    ps.setInt(3, user.sex)
    ps.setString(4, user.phone)

    ps.execute()
    conn.commit()
  }

  override def close(): Unit = {

    if (conn != null){
      conn.commit()
      conn.close()
    }
  }
} 
Example 119
Source File: MysqlSink.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.jdbcOutput

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.slf4j.{Logger, LoggerFactory}

class MysqlSink extends RichSinkFunction[User] {

  val logger: Logger = LoggerFactory.getLogger("MysqlSink")
  var conn: Connection = _
  var ps: PreparedStatement = _
  val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
  val username = "root"
  val password = "123456"
  val driverName = "com.mysql.jdbc.Driver"

  override def open(parameters: Configuration): Unit = {

    Class.forName(driverName)
    try {
      Class.forName(driverName)
      conn = DriverManager.getConnection(jdbcUrl, username, password)

      // close auto commit
      conn.setAutoCommit(false)
    } catch {
      case e@(_: ClassNotFoundException | _: SQLException) =>
        logger.error("init mysql error")
        e.printStackTrace()
        System.exit(-1);
    }
  }

  
  override def invoke(user: User, context: SinkFunction.Context[_]): Unit = {
    println("get user : " + user.toString)
    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
    ps.setString(1, user.username)
    ps.setString(2, user.password)
    ps.setInt(3, user.sex)
    ps.setString(4, user.phone)

    ps.execute()
    conn.commit()
  }



  override def close(): Unit = {
    if (conn != null){
      conn.commit()
      conn.close()
    }
  }
} 
Example 120
Source File: SparkNRedshiftUtil.scala    From SqlShift   with MIT License 5 votes vote down vote up
package com.goibibo.sqlshift

import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.databricks.spark.redshift.RedshiftReaderM
import com.typesafe.config.Config
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.{Logger, LoggerFactory}


trait SparkNRedshiftUtil extends BeforeAndAfterAll {
    self: Suite =>
    private val logger: Logger = LoggerFactory.getLogger(this.getClass)
    @transient private var _sc: SparkContext = _
    @transient private var _sqlContext: SQLContext = _

    def sc: SparkContext = _sc
    def sqlContext: SQLContext = _sqlContext

    private def getRedshiftConnection(config: Config): Connection = {
        val mysql = config.getConfig("redshift")
        val connectionProps = new Properties()
        connectionProps.put("user", mysql.getString("username"))
        connectionProps.put("password", mysql.getString("password"))
        val jdbcUrl = s"jdbc:redshift://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("database")}?useSSL=false"
        Class.forName("com.amazon.redshift.jdbc4.Driver")
        DriverManager.getConnection(jdbcUrl, connectionProps)
    }

    val getSparkContext: (SparkContext, SQLContext) = {
        val sparkConf: SparkConf = new SparkConf().setAppName("Full Dump Testing").setMaster("local")
        val sc: SparkContext = new SparkContext(sparkConf)
        val sqlContext: SQLContext = new SQLContext(sc)

        System.setProperty("com.amazonaws.services.s3.enableV4", "true")
        sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.ap-south-1.amazonaws.com")
        sc.hadoopConfiguration.set("fs.s3a.fast.upload", "true")
        (sc, sqlContext)
    }

    def readTableFromRedshift(config: Config, tableName: String): DataFrame = {
        val redshift: Config = config.getConfig("redshift")
        val options = Map("dbtable" -> tableName,
            "user" -> redshift.getString("username"),
            "password" -> redshift.getString("password"),
            "url" -> s"jdbc:redshift://${redshift.getString("hostname")}:${redshift.getInt("portno")}/${redshift.getString("database")}",
            "tempdir" -> config.getString("s3.location"),
            "aws_iam_role" -> config.getString("redshift.iamRole")
        )
        RedshiftReaderM.getDataFrameForConfig(options, sc, sqlContext)
    }

    def dropTableRedshift(config: Config, tables: String*): Unit = {
        logger.info("Droping table: {}", tables)
        val conn = getRedshiftConnection(config)
        val statement = conn.createStatement()
        try {
            val dropTableQuery = s"""DROP TABLE ${tables.mkString(",")}"""
            logger.info("Running query: {}", dropTableQuery)
            statement.executeUpdate(dropTableQuery)
        } finally {
            statement.close()
            conn.close()
        }
    }

    override protected def beforeAll(): Unit = {
        super.beforeAll()
        val (sc, sqlContext) = getSparkContext
        _sc = sc
        _sqlContext = sqlContext
    }

    override protected def afterAll(): Unit = {
        super.afterAll()
        _sc.stop()
    }
} 
Example 121
Source File: MySQLUtil.scala    From SqlShift   with MIT License 5 votes vote down vote up
package com.goibibo.sqlshift

import java.net.URL
import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.typesafe.config.Config
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source


object MySQLUtil {
    private val logger: Logger = LoggerFactory.getLogger(this.getClass)

    private def getMySQLConnection(config: Config): Connection = {
        val mysql = config.getConfig("mysql")
        val connectionProps = new Properties()
        connectionProps.put("user", mysql.getString("username"))
        connectionProps.put("password", mysql.getString("password"))
        val jdbcUrl = s"jdbc:mysql://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("db")}?createDatabaseIfNotExist=true&useSSL=false"
        Class.forName("com.mysql.jdbc.Driver")
        DriverManager.getConnection(jdbcUrl, connectionProps)
    }

    def createTableAndInsertRecords(config: Config, tableName: String, psvFile: URL): Unit = {
        logger.info("Inserting records in table: {}", tableName)
        val records = Source.fromFile(psvFile.toURI).getLines().toList.drop(1) // removing header

        val conn = getMySQLConnection(config)
        val statement = conn.createStatement()
        try {
            val tableCreateQuery = config.getString("table.tableCreateQuery").replace("${tableName}", tableName)
            logger.info("Running query: {}", tableCreateQuery)
            statement.executeUpdate(tableCreateQuery)
            val insertIntoQuery = config.getString("table.insertIntoQuery").replace("${tableName}", tableName)
            logger.info("Running query: {}", insertIntoQuery)
            records.foreach { record: String =>
                val columns = record.split("\\|")
                val query = insertIntoQuery.format(columns: _*)
                statement.executeUpdate(query)
            }
        } finally {
            statement.close()
            conn.close()
        }
    }
} 
Example 122
Source File: MailAPI.scala    From SqlShift   with MIT License 5 votes vote down vote up
package com.goibibo.sqlshift.alerting

import java.util.Properties
import javax.mail.Message.RecipientType
import javax.mail.internet.{InternetAddress, MimeMessage, _}
import javax.mail.{Authenticator, PasswordAuthentication, Session, Transport}

import com.goibibo.sqlshift.models.Configurations.AppConfiguration
import com.goibibo.sqlshift.models.Params.MailParams
import org.slf4j.{Logger, LoggerFactory}

class MailAPI(mailParams: MailParams) {
    private val logger: Logger = LoggerFactory.getLogger(classOf[MailAPI])

    private val prop = new Properties() {
        put("mail.smtp.host", mailParams.host)
        put("mail.smtp.port", mailParams.port.toString)
    }

    private val session: Session = mailParams.password match {
        case Some(password) =>
            prop.setProperty("mail.smtp.auth", "true")
            Session.getDefaultInstance(prop, new Authenticator {
                override def getPasswordAuthentication: PasswordAuthentication = {
                    new PasswordAuthentication(mailParams.username, password)
                }
            })
        case None => Session.getDefaultInstance(prop)
    }


    
    def send(appConfs: List[AppConfiguration]): Unit = {
        val from = "[email protected]"
        logger.info("Mail from: {}", from)
        var subject = "SQLShift:"
        var text = "<html>" +
                "<body>" +
                "<table border='1' style='width:100%' bgcolor='#F5F5F5'>" +
                "<tr> <th size=6>Mysql schema</th>" +
                "<th size=6>Mysql table_name</th>" +
                "<th size=6>Redshift schema</th>" +
                "<th size=6>Status</th>" +
                "<th size=6>Migration Time(sec)</th>" +
                "<th size=6>Error</th></tr>"

        logger.info(s"Mail to: '${mailParams.to}' and cc: '${mailParams.cc}'")
        val tos: List[String] = mailParams.to.split(",").toList
        var ccs: List[String] = List()
        if (mailParams.cc != "")
            ccs = mailParams.cc.split(",").toList

        var errorCnt = 0
        var successCnt = 0
        for (appConf <- appConfs) {

            text += "<tr>" +
                    "<td bgcolor='#FFE4C4'>" + appConf.mysqlConf.db + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.mysqlConf.tableName + "</td>" +
                    "<td bgcolor='#F5F5DC'>" + appConf.redshiftConf.schema + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.status.get.isSuccessful + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.migrationTime.get + "</td>"

            if (appConf.status.get.isSuccessful) {
                successCnt += 1
            }
            else {
                text += "<td bgcolor='#F0FFFF'>%s\n%s</td></tr>"
                        .format(appConf.status.get.e.getMessage, appConf.status.get.e.getStackTrace.mkString("\n"))
                errorCnt += 1
            }
        }

        subject += " Failed " + errorCnt.toString + " Success " + successCnt.toString + mailParams.subject

        text += "</table></body></html>"
        logger.info("Subject: {}", subject)

        val message = new MimeMessage(session)
        message.setFrom(new InternetAddress(from))
        for (to <- tos)
            message.addRecipient(RecipientType.TO, new InternetAddress(to))
        for (cc <- ccs)
            message.addRecipient(RecipientType.CC, new InternetAddress(cc))
        message.setSubject(subject)
        message.setText(text)

        val mimeBdyPart = new MimeBodyPart()

        mimeBdyPart.setContent(text, "text/html; charset=utf-8")

        val multiPart = new MimeMultipart()

        logger.info("Sending message...")
        multiPart.addBodyPart(mimeBdyPart)
        message.setContent(multiPart)
        Transport.send(message)
    }
} 
Example 123
Source File: MetricsWrapper.scala    From SqlShift   with MIT License 5 votes vote down vote up
package com.goibibo.sqlshift.commons

import java.util.concurrent.TimeUnit

import com.codahale.metrics.Timer.Context
import com.codahale.metrics._
import org.slf4j.{Logger, LoggerFactory}


    def stopSLF4JReporting(): Unit = {
        slf4jReporter.stop()
    }

    def stopJMXReporting(): Unit = {
        jmxReporter.stop()
    }

    def getTimerMetrics(metricName: String): Context = {
        val timer: Timer = metricRegistry.timer(metricName)
        timer.time()
    }

    def stopTimerContext(context: Context): Long = {
        context.stop()
    }

    def registerGauge(metricName: String, value: Boolean): Gauge[Boolean] = {
        try {
            metricRegistry.register(metricName, new Gauge[Boolean] {
                override def getValue: Boolean = {
                    value
                }
            })
        } catch {
            case e: IllegalArgumentException => logger.warn(s"$metricName gauge metric is already registered!!!")
                metricRegistry.getGauges.get(metricName).asInstanceOf[Gauge[Boolean]]
        }
    }

    def registerGauge(metricName: String, value: Int): Gauge[Int] = {
        try {
            metricRegistry.register(metricName, new Gauge[Int] {
                override def getValue: Int = {
                    value
                }
            })
        } catch {
            case e: IllegalArgumentException => logger.warn(s"$metricName gauge metric is already registered!!!")
                metricRegistry.getGauges.get(metricName).asInstanceOf[Gauge[Int]]
        }
    }

    def incCounter(metricName: String, incValue: Long = 1): Unit = {
        metricRegistry.counter(metricName).inc(incValue)
    }

} 
Example 124
Source File: cacheUpdates.scala    From AckCord   with MIT License 5 votes vote down vote up
package ackcord

import ackcord.cachehandlers.{CacheHandler, CacheSnapshotBuilder, CacheTypeRegistry}
import ackcord.gateway.Dispatch
import org.slf4j.Logger


case class APIMessageCacheUpdate[Data](
    data: Data,
    sendEvent: CacheState => Option[APIMessage],
    handler: CacheHandler[Data],
    registry: CacheTypeRegistry,
    dispatch: Dispatch[_]
) extends CacheEvent {

  override def process(builder: CacheSnapshotBuilder)(implicit log: Logger): Unit =
    handler.handle(builder, data, registry)
} 
Example 125
Source File: CacheStreams.scala    From AckCord   with MIT License 5 votes vote down vote up
package ackcord

import scala.collection.mutable

import ackcord.cachehandlers.CacheSnapshotBuilder
import ackcord.gateway.GatewayEvent.ReadyData
import ackcord.gateway.GatewayMessage
import ackcord.requests.SupervisionStreams
import akka.NotUsed
import akka.actor.typed.ActorSystem
import akka.stream.scaladsl.{BroadcastHub, Flow, Keep, MergeHub, Sink, Source}
import org.slf4j.Logger

object CacheStreams {

  
  def cacheUpdater(
      cacheProcessor: MemoryCacheSnapshot.CacheProcessor
  )(implicit system: ActorSystem[Nothing]): Flow[CacheEvent, (CacheEvent, CacheState), NotUsed] =
    Flow[CacheEvent].statefulMapConcat { () =>
      var state: CacheState    = null
      implicit val log: Logger = system.log

      //We only handle events when we are ready to, and we have received the ready event.
      def isReady: Boolean = state != null

      {
        case readyEvent @ APIMessageCacheUpdate(_: ReadyData, _, _, _, _) =>
          val builder = new CacheSnapshotBuilder(
            0,
            null, //The event will populate this,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            cacheProcessor
          )

          readyEvent.process(builder)

          val snapshot = builder.toImmutable
          state = CacheState(snapshot, snapshot)
          List(readyEvent -> state)
        case handlerEvent: CacheEvent if isReady =>
          val builder = CacheSnapshotBuilder(state.current)
          handlerEvent.process(builder)

          state = state.update(builder.toImmutable)
          List(handlerEvent -> state)
        case _ if !isReady =>
          log.error("Received event before ready")
          Nil
      }
    }
} 
Example 126
Source File: MLPMnistSingleLayerExample.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.examples.feedforward.mnist

import org.dl4scala.datasets.iterator.impl.MnistDataSetIterator
import org.slf4j.LoggerFactory
import org.slf4j.Logger
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.conf.layers.DenseLayer
import org.deeplearning4j.nn.conf.layers.OutputLayer
import org.deeplearning4j.nn.weights.WeightInit
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction


object MLPMnistSingleLayerExample extends App{
  private val log: Logger = LoggerFactory.getLogger(MLPMnistSingleLayerExample.getClass)

  // number of rows and columns in the input pictures//number of rows and columns in the input pictures
  private  val numRows = 28
  private  val numColumns = 28
  private  val outputNum = 10 // number of output classes
  private  val batchSize = 128 // batch size for each epoch
  private  val rngSeed = 123 // random number seed for reproducibility
  private  val numEpochs = 15 // number of epochs to perform

  // Get the DataSetIterators://Get the DataSetIterators:
  private val mnistTrain = new MnistDataSetIterator(batchSize, true, rngSeed)
  private val mnistTest = new MnistDataSetIterator(batchSize, false, rngSeed)


  log.info("Build model....")

  private val conf = new NeuralNetConfiguration
    .Builder()
    .seed(rngSeed) // include a random seed for reproducibility
    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // use stochastic gradient descent as an optimization algorithm
    .iterations(1)
    .learningRate(0.006) // specify the learning rate
    .updater(Updater.NESTEROVS)
    .regularization(true).l2(1e-4)
    .list()
    .layer(0, new DenseLayer.Builder() // create the first, input layer with xavier initialization
        .nIn(numRows * numColumns)
        .nOut(1000)
        .activation(Activation.RELU)
        .weightInit(WeightInit.XAVIER)
        .build())
    .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD) // create hidden layer
        .nIn(1000)
        .nOut(outputNum)
        .activation(Activation.SOFTMAX)
        .weightInit(WeightInit.XAVIER)
        .build())
    .pretrain(false).backprop(true) // use backpropagation to adjust weights
    .build()

  private val model = new MultiLayerNetwork(conf)
  model.init()
  // print the score with every 1 iteration
  model.setListeners(new ScoreIterationListener(1))
  log.info("Train model....")

  for(i <- 0 until numEpochs){
    model.fit(mnistTrain)
  }

  log.info("Evaluate model....")
  val eval = new Evaluation(outputNum) // create an evaluation object with 10 possible classes

  while(mnistTest.hasNext){
    val next = mnistTest.next()
    val output = model.output(next.getFeatureMatrix) // get the networks prediction
    eval.eval(next.getLabels, output) // check the prediction against the true class
  }

  log.info(eval.stats)
  log.info("****************Example finished********************")

} 
Example 127
Source File: CacheTypeRegistry.scala    From AckCord   with MIT License 5 votes vote down vote up
package ackcord.cachehandlers

import scala.reflect.ClassTag

import ackcord.data._
import ackcord.data.raw.PartialUser
import org.slf4j.Logger

class CacheTypeRegistry(
    val updateHandlers: Map[Class[_], CacheUpdater[_]],
    val deleteHandlers: Map[Class[_], CacheDeleter[_]],
    log: Logger
) {

  private def handleWithData[D: ClassTag, HandlerTpe[-A] <: CacheHandler[A]](
      handlers: Map[Class[_], HandlerTpe[_]],
      tpe: String,
      data: => D,
      builder: CacheSnapshotBuilder
  ): Unit =
    getWithData[D, HandlerTpe](tpe, handlers).foreach(handler => handler.handle(builder, data, this)(log))

  def updateData[D: ClassTag](builder: CacheSnapshotBuilder)(data: => D): Unit =
    handleWithData(updateHandlers, "updater", data, builder)

  def deleteData[D: ClassTag](builder: CacheSnapshotBuilder)(data: => D): Unit =
    handleWithData(deleteHandlers, "deleter", data, builder)

  private def getWithData[D, HandlerTpe[-A] <: CacheHandler[A]](
      tpe: String,
      handlers: Map[Class[_], HandlerTpe[_]]
  )(implicit tag: ClassTag[D]): Option[HandlerTpe[D]] = {
    val res = handlers
      .get(tag.runtimeClass)
      .asInstanceOf[Option[HandlerTpe[D]]]
      .orElse(handlers.find(_._1.isAssignableFrom(tag.runtimeClass)).map(_._2.asInstanceOf[HandlerTpe[D]]))

    if (res.isEmpty) {
      log.debug(s"No $tpe found for ${tag.runtimeClass}")
    }

    res
  }

  def getUpdater[D: ClassTag]: Option[CacheUpdater[D]] =
    getWithData("updater", updateHandlers)

  def getDeleter[D: ClassTag]: Option[CacheDeleter[D]] =
    getWithData("deleter", deleteHandlers)

  def hasUpdater[D: ClassTag]: Boolean =
    getUpdater.isDefined

  def hasDeleter[D: ClassTag]: Boolean =
    getDeleter.isDefined
}
object CacheTypeRegistry {

  private val noPresencesBansEmojiUpdaters: Map[Class[_], CacheUpdater[_]] = Map(
    classOf[PartialUser]      -> CacheHandlers.partialUserUpdater,
    classOf[Guild]            -> CacheHandlers.guildUpdater,
    classOf[GuildMember]      -> CacheHandlers.guildMemberUpdater,
    classOf[GuildChannel]     -> CacheHandlers.guildChannelUpdater,
    classOf[DMChannel]        -> CacheHandlers.dmChannelUpdater,
    classOf[GroupDMChannel]   -> CacheHandlers.dmGroupChannelUpdater,
    classOf[User]             -> CacheHandlers.userUpdater,
    classOf[UnavailableGuild] -> CacheHandlers.unavailableGuildUpdater,
    classOf[Message]          -> CacheHandlers.messageUpdater,
    classOf[Role]             -> CacheHandlers.roleUpdater
  )

  private val noPresencesUpdaters: Map[Class[_], CacheUpdater[_]] = noPresencesBansEmojiUpdaters ++ Map(
    classOf[Ban]   -> CacheUpdater.dummy[Ban],
    classOf[Emoji] -> CacheUpdater.dummy[Emoji]
  )

  private val allUpdaters: Map[Class[_], CacheUpdater[_]] =
    noPresencesUpdaters + (classOf[Presence] -> CacheUpdater.dummy[Presence])

  private val noBanDeleters: Map[Class[_], CacheDeleter[_]] = Map(
    classOf[GuildChannel]   -> CacheHandlers.guildChannelDeleter,
    classOf[DMChannel]      -> CacheHandlers.dmChannelDeleter,
    classOf[GroupDMChannel] -> CacheHandlers.groupDmChannelDeleter,
    classOf[GuildMember]    -> CacheHandlers.guildMemberDeleter,
    classOf[Role]           -> CacheHandlers.roleDeleter,
    classOf[Message]        -> CacheHandlers.messageDeleter
  )

  private val allDeleters: Map[Class[_], CacheDeleter[_]] = noBanDeleters + (classOf[Ban] -> CacheDeleter.dummy[Ban])

  def default(log: Logger) = new CacheTypeRegistry(allUpdaters, allDeleters, log)

  def noPresences(log: Logger) = new CacheTypeRegistry(noPresencesUpdaters, allDeleters, log)

  def noPresencesBansEmoji(log: Logger) =
    new CacheTypeRegistry(noPresencesBansEmojiUpdaters, noBanDeleters, log)
} 
Example 128
Source File: ReadyUpdater.scala    From AckCord   with MIT License 5 votes vote down vote up
package ackcord.cachehandlers

import ackcord.CacheSnapshot.BotUser
import ackcord.gateway.GatewayEvent.ReadyData
import org.slf4j.Logger
import shapeless.tag

//We handle this one separately as is it's kind of special
object ReadyUpdater extends CacheUpdater[ReadyData] {
  override def handle(builder: CacheSnapshotBuilder, obj: ReadyData, registry: CacheTypeRegistry)(
      implicit log: Logger
  ): Unit = {
    val ReadyData(_, botUser, unavailableGuilds, _, _) = obj

    val guilds = unavailableGuilds.map(g => g.id -> g)

    builder.botUser = tag[BotUser](botUser)
    builder.unavailableGuildMap ++= guilds
  }
} 
Example 129
Source File: VoiceUDPHandler.scala    From AckCord   with MIT License 5 votes vote down vote up
package ackcord.voice

import java.net.InetSocketAddress

import scala.concurrent.duration._
import scala.util.{Failure, Success}

import ackcord.data.{RawSnowflake, UserId}
import akka.NotUsed
import akka.actor.typed._
import akka.actor.typed.scaladsl._
import akka.stream.OverflowStrategy
import akka.stream.scaladsl.{Keep, Sink, Source, SourceQueueWithComplete}
import akka.util.ByteString
import org.slf4j.Logger

object VoiceUDPHandler {

  def apply(
      address: String,
      port: Int,
      ssrc: Int,
      serverId: RawSnowflake,
      userId: UserId,
      soundProducer: Source[ByteString, NotUsed],
      soundConsumer: Sink[AudioAPIMessage, NotUsed],
      parent: ActorRef[VoiceHandler.Command]
  ): Behavior[Command] =
    Behaviors
      .supervise(
        Behaviors.setup[Command] { ctx =>
          implicit val system: ActorSystem[Nothing] = ctx.system

          val ((queue, futIp), watchDone) = soundProducer
            .viaMat(
              VoiceUDPFlow
                .flow(
                  new InetSocketAddress(address, port),
                  ssrc,
                  serverId,
                  userId,
                  Source.queue[Option[ByteString]](0, OverflowStrategy.dropBuffer)
                )
                .watchTermination()(Keep.both)
            )(Keep.right)
            .to(soundConsumer)
            .run()

          ctx.pipeToSelf(futIp) {
            case Success(value) => IPDiscoveryResult(value)
            case Failure(e)     => SendExeption(e)
          }
          ctx.pipeToSelf(watchDone)(_ => ConnectionDied)

          handle(ctx, ctx.log, ssrc, queue, parent)
        }
      )
      .onFailure(
        SupervisorStrategy
          .restartWithBackoff(100.millis, 5.seconds, 1D)
          .withResetBackoffAfter(10.seconds)
          .withMaxRestarts(5)
      )

  def handle(
      ctx: ActorContext[Command],
      log: Logger,
      ssrc: Int,
      queue: SourceQueueWithComplete[Option[ByteString]],
      parent: ActorRef[VoiceHandler.Command]
  ): Behavior[Command] = Behaviors.receiveMessage {
    case SendExeption(e) => throw e
    case ConnectionDied  => Behaviors.stopped
    case Shutdown =>
      queue.complete()
      Behaviors.same
    case IPDiscoveryResult(VoiceUDPFlow.FoundIP(localAddress, localPort)) =>
      parent ! VoiceHandler.GotLocalIP(localAddress, localPort)
      Behaviors.same
    case SetSecretKey(key) =>
      queue.offer(key)
      Behaviors.same
  }

  sealed trait Command

  case object Shutdown extends Command

  private case class SendExeption(e: Throwable)                       extends Command
  private case object ConnectionDied                                  extends Command
  private case class IPDiscoveryResult(foundIP: VoiceUDPFlow.FoundIP) extends Command
  private[voice] case class SetSecretKey(key: Option[ByteString])     extends Command
} 
Example 130
Source File: LoggingTrait.scala    From azure-sqldb-spark   with MIT License 5 votes vote down vote up
package com.microsoft.azure.sqldb.spark

import org.slf4j.{Logger, LoggerFactory}

private[spark] trait LoggingTrait {

  // Make the log field transient so that objects with Logging can
  // be serialized and used on another machine
  @transient private var log_ : Logger = null // scalastyle:ignore

  // Method to get the logger name for this object
  protected def logName = {
    // Ignore trailing $'s in the class names for Scala objects
    this.getClass.getName.stripSuffix("$")
  }

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (log_ == null) {
      // scalastyle:ignore
      log_ = LoggerFactory.getLogger(logName)
    }
    log_
  }

  // Log methods that take only a String
  protected def logInfo(msg: => String) {
    if (log.isInfoEnabled) log.info(msg)
  }

  protected def logDebug(msg: => String) {
    if (log.isDebugEnabled) log.debug(msg)
  }

  protected def logTrace(msg: => String) {
    if (log.isTraceEnabled) log.trace(msg)
  }

  protected def logWarning(msg: => String) {
    if (log.isWarnEnabled) log.warn(msg)
  }

  protected def logError(msg: => String) {
    if (log.isErrorEnabled) log.error(msg)
  }

  // Log methods that take Throwables (Exceptions/Errors) too
  protected def logInfo(msg: => String, throwable: Throwable) {
    if (log.isInfoEnabled) log.info(msg, throwable)
  }

  protected def logDebug(msg: => String, throwable: Throwable) {
    if (log.isDebugEnabled) log.debug(msg, throwable)
  }

  protected def logTrace(msg: => String, throwable: Throwable) {
    if (log.isTraceEnabled) log.trace(msg, throwable)
  }

  protected def logWarning(msg: => String, throwable: Throwable) {
    if (log.isWarnEnabled) log.warn(msg, throwable)
  }

  protected def logError(msg: => String, throwable: Throwable) {
    if (log.isErrorEnabled) log.error(msg, throwable)
  }
} 
Example 131
Source File: IrisMLP.scala    From scala-deeplearn-examples   with Apache License 2.0 5 votes vote down vote up
package io.brunk.examples.scalnet

import io.brunk.examples.IrisReader
import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.scalnet.layers.core.Dense
import org.deeplearning4j.scalnet.models.Sequential
import org.deeplearning4j.scalnet.regularizers.L2
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.learning.config.Sgd
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{Logger, LoggerFactory}


object IrisMLP {

  private val log: Logger = LoggerFactory.getLogger(IrisMLP.getClass)

  def main(args: Array[String]): Unit = {

    val seed         = 1
    val numInputs    = 4
    val numHidden    = 10
    val numOutputs   = 3
    val learningRate = 0.1
    val iterations   = 1000

    val testAndTrain  = IrisReader.readData()
    val trainList     = testAndTrain.getTrain.asList()
    val trainIterator = new ListDataSetIterator(trainList, trainList.size)

    val model = Sequential(rngSeed = seed)
    model.add(Dense(numHidden, nIn = numInputs, weightInit = WeightInit.XAVIER, activation = Activation.RELU))
    model.add(Dense(numOutputs, weightInit = WeightInit.XAVIER, activation = Activation.SOFTMAX))

    model.compile(lossFunction = LossFunction.NEGATIVELOGLIKELIHOOD, updater = Updater.SGD)

    log.info("Running training")
    model.fit(iter = trainIterator,
              nbEpoch = iterations,
              listeners = List(new ScoreIterationListener(100)))
    log.info("Training finished")

    log.info(s"Evaluating model on ${testAndTrain.getTest.getLabels.rows()} examples")
    val evaluator        = new Evaluation(numOutputs)
    val output: INDArray = model.predict(testAndTrain.getTest.getFeatureMatrix)
    evaluator.eval(testAndTrain.getTest.getLabels, output)
    log.info(evaluator.stats())

  }
} 
Example 132
Source File: MnistMLP.scala    From scala-deeplearn-examples   with Apache License 2.0 5 votes vote down vote up
package io.brunk.examples.scalnet

import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.scalnet.layers.core.Dense
import org.deeplearning4j.scalnet.models.Sequential
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator
import org.nd4j.linalg.learning.config.Sgd
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters.asScalaIteratorConverter



object MnistMLP {
  private val log: Logger = LoggerFactory.getLogger(MnistMLP.getClass)

  def main(args: Array[String]): Unit = {

    val seed         = 1       // for reproducibility
    val numInputs    = 28 * 28
    val numHidden    = 512     // size (number of neurons) in our hidden layer
    val numOutputs   = 10      // digits from 0 to 9
    val learningRate = 0.01
    val batchSize    = 128
    val numEpochs    = 10

    // download and load the MNIST images as tensors
    val mnistTrain: DataSetIterator = new MnistDataSetIterator(batchSize, true, seed)
    val mnistTest: DataSetIterator = new MnistDataSetIterator(batchSize, false, seed)

    // define the neural network architecture
    val model: Sequential = Sequential(rngSeed = seed)
    model.add(Dense(nOut = numHidden, nIn = numInputs, weightInit = WeightInit.XAVIER, activation = Activation.RELU))
    model.add(Dense(nOut = numOutputs, weightInit = WeightInit.XAVIER, activation = Activation.RELU))
    model.compile(lossFunction = LossFunction.MCXENT, updater = Updater.SGD) // TODO how do we set the learning rate?

    // train the model
    model.fit(mnistTrain, nbEpoch = numEpochs, List(new ScoreIterationListener(100)))

    // evaluate model performance
    def accuracy(dataSet: DataSetIterator): Double = {
      val evaluator = new Evaluation(numOutputs)
      dataSet.reset()
      for (dataSet <- dataSet.asScala) {
        val output = model.predict(dataSet)
        evaluator.eval(dataSet.getLabels, output)
      }
      evaluator.accuracy()
    }

    log.info(s"Train accuracy = ${accuracy(mnistTrain)}")
    log.info(s"Test accuracy = ${accuracy(mnistTest)}")
  }
} 
Example 133
Source File: IrisMLP.scala    From scala-deeplearn-examples   with Apache License 2.0 5 votes vote down vote up
package io.brunk.examples.dl4j

import io.brunk.examples.IrisReader
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.layers.{ DenseLayer, OutputLayer }
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{ Logger, LoggerFactory }


object IrisMLP {
  private val log: Logger = LoggerFactory.getLogger(IrisMLP.getClass)

  def main(args: Array[String]): Unit = {

    val seed         = 1  // for reproducibility
    val numInputs    = 4
    val numHidden    = 10
    val numOutputs   = 3
    val learningRate = 0.1
    val numEpoch   =   30

    val testAndTrain = IrisReader.readData()

    val conf = new NeuralNetConfiguration.Builder()
      .seed(seed)
      .activation(Activation.RELU)
      .weightInit(WeightInit.XAVIER)
      .list()
      .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHidden).build())
      .layer(1,
             new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD)
               .activation(Activation.SOFTMAX)
               .nIn(numHidden)
               .nOut(numOutputs)
               .build())
      .backprop(true)
      .pretrain(false)
      .build()

    val model = new MultiLayerNetwork(conf)
    model.init()
    model.setListeners(new ScoreIterationListener(100)) // print out scores every 100 iterations

    log.info("Running training")
    for(_ <- 0 until numEpoch)
      model.fit(testAndTrain.getTrain)

    log.info("Training finished")

    log.info(s"Evaluating model on ${testAndTrain.getTest.getLabels.rows()} examples")
    val evaluator = new Evaluation(numOutputs)
    val output    = model.output(testAndTrain.getTest.getFeatureMatrix)
    evaluator.eval(testAndTrain.getTest.getLabels, output)
    println(evaluator.stats)
  }
} 
Example 134
Source File: ScorexLogging.scala    From Waves   with MIT License 5 votes vote down vote up
package com.wavesplatform.utils

import monix.eval.Task
import monix.execution.{CancelableFuture, Scheduler}
import monix.reactive.Observable
import org.slf4j.{Logger, LoggerFactory}

case class LoggerFacade(logger: Logger) {
  def trace(message: => String, throwable: Throwable): Unit = {
    if (logger.isTraceEnabled)
      logger.trace(message, throwable)
  }

  def trace(message: => String): Unit = {
    if (logger.isTraceEnabled)
      logger.trace(message)
  }

  def debug(message: => String, arg: Any): Unit = {
    if (logger.isDebugEnabled)
      logger.debug(message, arg)
  }

  def debug(message: => String): Unit = {
    if (logger.isDebugEnabled)
      logger.debug(message)
  }

  def info(message: => String): Unit = {
    if (logger.isInfoEnabled)
      logger.info(message)
  }

  def info(message: => String, arg: Any): Unit = {
    if (logger.isInfoEnabled)
      logger.info(message, arg)
  }

  def info(message: => String, throwable: Throwable): Unit = {
    if (logger.isInfoEnabled)
      logger.info(message, throwable)
  }

  def warn(message: => String): Unit = {
    if (logger.isWarnEnabled)
      logger.warn(message)
  }

  def warn(message: => String, throwable: Throwable): Unit = {
    if (logger.isWarnEnabled)
      logger.warn(message, throwable)
  }

  def error(message: => String): Unit = {
    if (logger.isErrorEnabled)
      logger.error(message)
  }

  def error(message: => String, throwable: Throwable): Unit = {
    if (logger.isErrorEnabled)
      logger.error(message, throwable)
  }
}

trait ScorexLogging {
  protected lazy val log = LoggerFacade(LoggerFactory.getLogger(this.getClass))

  implicit class TaskExt[A](t: Task[A]) {
    def runAsyncLogErr(implicit s: Scheduler): CancelableFuture[A] =
      logErr.runToFuture(s)

    def logErr: Task[A] = {
      t.onErrorHandleWith(ex => {
        log.error(s"Error executing task", ex)
        Task.raiseError[A](ex)
      })
    }
  }

  implicit class ObservableExt[A](o: Observable[A]) {

    def logErr: Observable[A] = {
      o.onErrorHandleWith(ex => {
        log.error(s"Error observing item", ex)
        Observable.raiseError[A](ex)
      })
    }
  }
} 
Example 135
Source File: LogPublisherHub.scala    From vamp   with Apache License 2.0 5 votes vote down vote up
package io.vamp.common.akka

import akka.actor.{ ActorRef, ActorSystem }
import ch.qos.logback.classic.filter.ThresholdFilter
import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.classic.{ Level, LoggerContext, Logger ⇒ LogbackLogger }
import ch.qos.logback.core.AppenderBase
import io.vamp.common.Namespace
import org.slf4j.{ Logger, LoggerFactory }

import scala.collection.mutable

object LogPublisherHub {

  private val logger = LoggerFactory.getLogger(LogPublisherHub.getClass)

  private val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext]
  private val rootLogger = context.getLogger(Logger.ROOT_LOGGER_NAME)

  private val sessions: mutable.Map[String, LogPublisher] = new mutable.HashMap()

  def subscribe(to: ActorRef, level: String, loggerName: Option[String], encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace): Unit = {
    val appenderLevel = Level.toLevel(level, Level.INFO)
    val appenderLogger = loggerName.map(context.getLogger).getOrElse(rootLogger)

    val exists = sessions.get(to.toString).exists { publisher ⇒
      publisher.level == appenderLevel && publisher.logger.getName == appenderLogger.getName
    }

    if (!exists) {
      unsubscribe(to)
      if (appenderLevel != Level.OFF) {
        logger.info(s"Starting log publisher [${appenderLevel.levelStr}] '${appenderLogger.getName}': $to")
        val publisher = LogPublisher(to, appenderLogger, appenderLevel, encoder)
        publisher.start()
        sessions.put(to.toString, publisher)
      }
    }
  }

  def unsubscribe(to: ActorRef): Unit = {
    sessions.remove(to.toString).foreach { publisher ⇒
      logger.info(s"Stopping log publisher: $to")
      publisher.stop()
    }
  }
}

private case class LogPublisher(to: ActorRef, logger: LogbackLogger, level: Level, encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace) {

  private val filter = new ThresholdFilter()
  filter.setLevel(level.levelStr)

  private val appender = new AppenderBase[ILoggingEvent] {
    override def append(loggingEvent: ILoggingEvent) = to ! encoder(loggingEvent)
  }

  appender.addFilter(filter)
  appender.setName(to.toString)

  def start() = {
    val context = logger.getLoggerContext
    filter.setContext(context)
    appender.setContext(context)
    filter.start()
    appender.start()
    logger.addAppender(appender)
  }

  def stop() = {
    appender.stop()
    filter.stop()
    logger.detachAppender(appender)
  }
} 
Example 136
Source File: DataUtilities.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.examples.utilities

import java.io._

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.slf4j.{Logger, LoggerFactory}


object DataUtilities {
  val logger: Logger = LoggerFactory.getLogger(DataUtilities.getClass)
  private val BUFFER_SIZE = 4096

  @throws(classOf[IOException])
  def extractTarGz(filePath: String, outputPath: String): Unit = {
    var fileCount = 0
    var dirCount = 0

    logger.info("Extracting files")

    val tais = new TarArchiveInputStream(new GzipCompressorInputStream(
      new BufferedInputStream(new FileInputStream(filePath))))
    // Read the tar entries using the getNextEntry method
    Stream.continually(tais.getNextTarEntry).takeWhile(_ !=null).foreach{ entry =>
      // Create directories as required
      if (entry.isDirectory) {
        new File(outputPath + "/" + entry.getName).mkdirs
        dirCount += 1
      } else {
        val data = new Array[Byte](BUFFER_SIZE)
        val fos = new FileOutputStream(outputPath + "/" + entry.getName)
        val dest = new BufferedOutputStream(fos, BUFFER_SIZE)
        Stream.continually(tais.read(data, 0, BUFFER_SIZE)).takeWhile(_ != -1).foreach{ count =>
          dest.write(data, 0, count)
        }
        dest.close()
        fileCount = fileCount + 1
      }
      if (fileCount % 1000 == 0) logger.info(".")
    }

    tais.close()
  }
} 
Example 137
Source File: MLPMnistTwoLayerExample.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.examples.feedforward.mnist

import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.layers.{DenseLayer, OutputLayer}
import org.deeplearning4j.nn.conf.{NeuralNetConfiguration, Updater}
import org.deeplearning4j.nn.weights.WeightInit
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{Logger, LoggerFactory}
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.nd4j.linalg.learning.config.Nesterovs


object MLPMnistTwoLayerExample extends App{
  private val log: Logger = LoggerFactory.getLogger(MLPMnistTwoLayerExample.getClass)

  // number of rows and columns in the input pictures//number of rows and columns in the input pictures
  private  val numRows = 28
  private  val numColumns = 28
  private  val outputNum = 10 // number of output classes
  private  val batchSize = 128 // batch size for each epoch
  private  val rngSeed = 123 // random number seed for reproducibility
  private  val numEpochs = 15 // number of epochs to perform
  private  val rate: Double = 0.0015 // learning rate

  // Get the DataSetIterators://Get the DataSetIterators:
  private val mnistTrain = new MnistDataSetIterator(batchSize, true, rngSeed)
  private val mnistTest = new MnistDataSetIterator(batchSize, false, rngSeed)

  log.info("Build model....")

  private val conf = new NeuralNetConfiguration
    .Builder()
    .seed(rngSeed) // include a random seed for reproducibility
    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // use stochastic gradient descent as an optimization algorithm
    .iterations(1)
    .activation(Activation.RELU)
    .weightInit(WeightInit.XAVIER)
    .learningRate(rate) // specify the learning rate
    .updater(new Nesterovs(0.98))
    .regularization(true).l2(rate * 0.005) // regularize learning model
    .list()
    .layer(0, new DenseLayer.Builder() // create the first input layer.
      .nIn(numRows * numColumns)
      .nOut(500)
      .build())
    .layer(1, new DenseLayer.Builder() // create the second input layer
      .nIn(500)
      .nOut(100)
      .build())
    .layer(2, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD) // create hidden layer
      .activation(Activation.SOFTMAX)
      .nIn(100)
      .nOut(outputNum)
      .build())
    .pretrain(false).backprop(true)
    .build()

  val model = new MultiLayerNetwork(conf)
  model.init()
  model.setListeners(new ScoreIterationListener(5)) // print the score with every iteration

  log.info("Train model....")

  for(i <- 0 until numEpochs){
    model.fit(mnistTrain)
  }

  log.info("Evaluate model....")
  val eval = new Evaluation(outputNum) // create an evaluation object with 10 possible classes

  while(mnistTest.hasNext){
    val next = mnistTest.next
    val output = model.output(next.getFeatureMatrix) // get the networks prediction
    eval.eval(next.getLabels, output) // check the prediction against the true class
  }

  log.info(eval.stats)
  log.info("****************Example finished********************")
} 
Example 138
Source File: ExecutorSystemLauncher.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.appmaster

import scala.concurrent.duration._

import akka.actor._
import org.slf4j.Logger

import org.apache.gearpump.cluster.AppMasterToWorker.LaunchExecutor
import org.apache.gearpump.cluster.ExecutorJVMConfig
import org.apache.gearpump.cluster.WorkerToAppMaster._
import org.apache.gearpump.cluster.appmaster.ExecutorSystemLauncher._
import org.apache.gearpump.cluster.appmaster.ExecutorSystemScheduler.{ExecutorSystemJvmConfig, Session}
import org.apache.gearpump.cluster.scheduler.Resource
import org.apache.gearpump.util.ActorSystemBooter.{ActorSystemRegistered, RegisterActorSystem}
import org.apache.gearpump.util.{ActorSystemBooter, ActorUtil, Constants, LogUtil}


private[appmaster]
class ExecutorSystemLauncher(appId: Int, session: Session) extends Actor {

  private val LOG: Logger = LogUtil.getLogger(getClass)

  val scheduler = context.system.scheduler
  implicit val executionContext = context.dispatcher

  private val systemConfig = context.system.settings.config
  val timeoutSetting = systemConfig.getInt(Constants.GEARPUMP_START_EXECUTOR_SYSTEM_TIMEOUT_MS)

  val timeout = scheduler.scheduleOnce(timeoutSetting.milliseconds,
    self, LaunchExecutorSystemTimeout(session))

  def receive: Receive = waitForLaunchCommand

  def waitForLaunchCommand: Receive = {
    case LaunchExecutorSystem(worker, executorSystemId, resource) =>
      val launcherPath = ActorUtil.getFullPath(context.system, self.path)
      val jvmConfig = Option(session.executorSystemJvmConfig)
        .map(getExecutorJvmConfig(_, s"app${appId}system${executorSystemId}", launcherPath)).orNull

      val launch = LaunchExecutor(appId, executorSystemId, resource, jvmConfig)
      LOG.info(s"Launching Executor ...appId: $appId, executorSystemId: $executorSystemId, " +
        s"slots: ${resource.slots} on worker $worker")

      worker.ref ! launch
      context.become(waitForActorSystemToStart(sender, launch, worker, executorSystemId))
  }

  def waitForActorSystemToStart(
      replyTo: ActorRef, launch: LaunchExecutor, worker: WorkerInfo, executorSystemId: Int)
    : Receive = {
    case RegisterActorSystem(systemPath) =>
      import launch._
      timeout.cancel()
      LOG.info(s"Received RegisterActorSystem $systemPath for session ${session.requestor}")
      sender ! ActorSystemRegistered(worker.ref)
      val system =
        ExecutorSystem(executorId, AddressFromURIString(systemPath), sender, resource, worker)
      replyTo ! LaunchExecutorSystemSuccess(system, session)
      context.stop(self)
    case reject@ExecutorLaunchRejected(reason, ex) =>
      LOG.error(s"Executor Launch ${launch.resource} failed reason: $reason", ex)
      replyTo ! LaunchExecutorSystemRejected(launch.resource, reason, session)
      context.stop(self)
    case timeout: LaunchExecutorSystemTimeout =>
      LOG.error(s"The Executor ActorSystem $executorSystemId has not been started in time")
      replyTo ! timeout
      context.stop(self)
  }
}

private[appmaster]
object ExecutorSystemLauncher {

  case class LaunchExecutorSystem(worker: WorkerInfo, systemId: Int, resource: Resource)

  case class LaunchExecutorSystemSuccess(system: ExecutorSystem, session: Session)

  case class LaunchExecutorSystemRejected(resource: Resource, reason: Any, session: Session)

  case class LaunchExecutorSystemTimeout(session: Session)

  private def getExecutorJvmConfig(conf: ExecutorSystemJvmConfig, systemName: String,
      reportBack: String): ExecutorJVMConfig = {
    Option(conf).map { conf =>
      import conf._
      ExecutorJVMConfig(classPath, jvmArguments, classOf[ActorSystemBooter].getName,
        Array(systemName, reportBack), jar, username, executorAkkaConfig)
    }.getOrElse(null)
  }
} 
Example 139
Source File: RunningApplication.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.client

import akka.actor.ActorRef
import akka.pattern.ask
import akka.util.Timeout
import org.apache.gearpump.cluster.ClientToMaster.{RegisterAppResultListener, ResolveAppId, ShutdownApplication}
import org.apache.gearpump.cluster.MasterToClient._
import org.apache.gearpump.cluster.client.RunningApplication._
import org.apache.gearpump.util.{ActorUtil, LogUtil}
import org.slf4j.Logger
import java.time.Duration
import java.util.concurrent.TimeUnit

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}

class RunningApplication(val appId: Int, master: ActorRef, timeout: Timeout) {
  lazy val appMaster: Future[ActorRef] = resolveAppMaster(appId)

  def shutDown(): Unit = {
    val result = ActorUtil.askActor[ShutdownApplicationResult](master,
      ShutdownApplication(appId), timeout)
    result.appId match {
      case Success(_) =>
      case Failure(ex) => throw ex
    }
  }

  
  def waitUntilFinish(): Unit = {
    this.waitUntilFinish(INF_DURATION)
  }

  def waitUntilFinish(duration: Duration): Unit = {
    val result = ActorUtil.askActor[ApplicationResult](master,
      RegisterAppResultListener(appId), new Timeout(duration.getSeconds, TimeUnit.SECONDS))
    if (result.appId == appId) {
      result match {
        case failed: ApplicationFailed =>
          throw failed.error
        case _: ApplicationSucceeded =>
          LOG.info(s"Application $appId succeeded")
        case _: ApplicationTerminated =>
          LOG.info(s"Application $appId terminated")
      }
    } else {
      LOG.warn(s"Received unexpected result $result for application $appId")
    }
  }

  def askAppMaster[T](msg: Any): Future[T] = {
    appMaster.flatMap(_.ask(msg)(timeout).asInstanceOf[Future[T]])
  }

  private def resolveAppMaster(appId: Int): Future[ActorRef] = {
    master.ask(ResolveAppId(appId))(timeout).
      asInstanceOf[Future[ResolveAppIdResult]].map(_.appMaster.get)
  }
}

object RunningApplication {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  // This magic number is derived from Akka's configuration, which is the maximum delay
  private val INF_DURATION = Duration.ofSeconds(2147482)
} 
Example 140
Source File: Scheduler.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.cluster.scheduler

import akka.actor.{Actor, ActorRef}
import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.cluster.MasterToWorker.{UpdateResourceFailed, UpdateResourceSucceed, WorkerRegistered}
import org.apache.gearpump.cluster.WorkerToMaster.ResourceUpdate
import org.apache.gearpump.cluster.master.Master.WorkerTerminated
import org.apache.gearpump.cluster.scheduler.Scheduler.ApplicationFinished
import org.apache.gearpump.cluster.worker.WorkerId
import org.apache.gearpump.util.LogUtil
import org.slf4j.Logger

import scala.collection.mutable


abstract class Scheduler extends Actor {
  val LOG: Logger = LogUtil.getLogger(getClass)
  protected var resources = new mutable.HashMap[WorkerId, (ActorRef, Resource)]

  def handleScheduleMessage: Receive = {
    case WorkerRegistered(id, _) =>
      if (!resources.contains(id)) {
        LOG.info(s"Worker $id added to the scheduler")
        resources.put(id, (sender, Resource.empty))
      }
    case update@ResourceUpdate(worker, workerId, resource) =>
      LOG.info(s"$update...")
      if (resources.contains(workerId)) {
        val resourceReturned = resource > resources.get(workerId).get._2
        resources.update(workerId, (worker, resource))
        if (resourceReturned) {
          allocateResource()
        }
        sender ! UpdateResourceSucceed
      }
      else {
        sender ! UpdateResourceFailed(
          s"ResourceUpdate failed! The worker $workerId has not been registered into master")
      }
    case WorkerTerminated(workerId) =>
      if (resources.contains(workerId)) {
        resources -= workerId
      }
    case ApplicationFinished(appId) =>
      doneApplication(appId)
  }

  def allocateResource(): Unit

  def doneApplication(appId: Int): Unit
}

object Scheduler {
  case class PendingRequest(
      appId: Int, appMaster: ActorRef, request: ResourceRequest, timeStamp: MilliSeconds)

  case class ApplicationFinished(appId: Int)
} 
Example 141
Source File: LocalJarStore.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.jarstore.local

import java.io._

import com.typesafe.config.Config
import org.apache.gearpump.jarstore.JarStore
import org.apache.gearpump.util.{Constants, FileUtils, LogUtil, Util}
import org.slf4j.Logger


  override def getFile(fileName: String): InputStream = {
    val localFile = new File(rootPath, fileName)
    val is = try {
      new FileInputStream(localFile)
    } catch {
      case ex: Exception =>
        LOG.error(s"Fetch file $fileName failed: ${ex.getStackTrace}")
        new ClosedInputStream
    }
    is
  }

  private def createDirIfNotExists(file: File): Unit = {
    if (!file.exists()) {
      FileUtils.forceMkdir(file)
    }
  }
} 
Example 142
Source File: JarStoreClient.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.jarstore

import java.io.File
import java.util.concurrent.TimeUnit
import scala.collection.JavaConverters._
import scala.concurrent.duration.Duration
import scala.concurrent.Await

import akka.pattern.ask
import akka.actor.{ActorSystem, ActorRef}
import com.typesafe.config.Config
import org.apache.gearpump.cluster.master.MasterProxy
import org.apache.gearpump.util.{Util, Constants, LogUtil}
import org.slf4j.Logger

import org.apache.gearpump.cluster.ClientToMaster.{GetJarStoreServer, JarStoreServerAddress}
import scala.concurrent.{Future, ExecutionContext}

class JarStoreClient(config: Config, system: ActorSystem) {
  private def LOG: Logger = LogUtil.getLogger(getClass)
  private implicit val timeout = Constants.FUTURE_TIMEOUT
  private implicit def dispatcher: ExecutionContext = system.dispatcher

  private val master: ActorRef = {
    val masters = config.getStringList(Constants.GEARPUMP_CLUSTER_MASTERS)
      .asScala.flatMap(Util.parseHostList)
    system.actorOf(MasterProxy.props(masters), s"masterproxy${Util.randInt()}")
  }

  private lazy val client = (master ? GetJarStoreServer).asInstanceOf[Future[JarStoreServerAddress]]
    .map { address =>
      val client = new FileServer.Client(system, address.url)
      client
    }

  
  def copyFromLocal(localFile: File): FilePath = {
    val future = client.flatMap(_.upload(localFile))
    Await.result(future, Duration(60, TimeUnit.SECONDS))
  }
} 
Example 143
Source File: TimeOutSchedulerSpec.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.util

import scala.concurrent.duration._

import akka.actor._
import akka.testkit.{ImplicitSender, TestActorRef, TestKit, TestProbe}
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}
import org.slf4j.Logger

import org.apache.gearpump.cluster.TestUtil

class TimeOutSchedulerSpec(_system: ActorSystem) extends TestKit(_system) with ImplicitSender
  with WordSpecLike with Matchers with BeforeAndAfterAll {

  def this() = this(ActorSystem("WorkerSpec", TestUtil.DEFAULT_CONFIG))
  val mockActor = TestProbe()

  override def afterAll {
    TestKit.shutdownActorSystem(system)
  }

  "The TimeOutScheduler" should {
    "handle the time out event" in {
      val testActorRef = TestActorRef(Props(classOf[TestActor], mockActor.ref))
      val testActor = testActorRef.underlyingActor.asInstanceOf[TestActor]
      testActor.sendMsgToIgnore()
      mockActor.expectMsg(30.seconds, MessageTimeOut)
    }
  }
}

case object Echo
case object MessageTimeOut

class TestActor(mock: ActorRef) extends Actor with TimeOutScheduler {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  val target = context.actorOf(Props(classOf[EchoActor]))

  override def receive: Receive = {
    case _ =>
  }

  def sendMsgToIgnore(): Unit = {
    sendMsgWithTimeOutCallBack(target, Echo, 2000, sendMsgTimeOut())
  }

  private def sendMsgTimeOut(): Unit = {
    mock ! MessageTimeOut
  }
}

class EchoActor extends Actor {
  override def receive: Receive = {
    case _ =>
  }
} 
Example 144
Source File: ScribeLoggerFactory.scala    From scribe   with MIT License 5 votes vote down vote up
package scribe.slf4j

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{Logger, ILoggerFactory}

class ScribeLoggerFactory extends ILoggerFactory {
  private val map = new ConcurrentHashMap[String, Logger]

  override def getLogger(name: String): Logger = {
    val loggerName = if (name.equalsIgnoreCase(Logger.ROOT_LOGGER_NAME)) {
      ""
    } else {
      name
    }
    Option(map.get(loggerName)) match {
      case Some(logger) => logger
      case None => {
        val adapter = new ScribeLoggerAdapter(loggerName)
        val old = map.putIfAbsent(loggerName, adapter)
        Option(old) match {
          case Some(a) => a
          case None => adapter
        }
      }
    }
  }
} 
Example 145
Source File: ScribeLoggerFactory.scala    From scribe   with MIT License 5 votes vote down vote up
package scribe.slf4j

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{ILoggerFactory, Logger}

object ScribeLoggerFactory extends ILoggerFactory {
  private lazy val map = new ConcurrentHashMap[String, Logger]()

  override def getLogger(name: String): Logger = Option(map.get(name)) match {
    case Some(logger) => logger
    case None => {
      val logger = new ScribeLoggerAdapter(name)
      val oldInstance = map.putIfAbsent(name, logger)
      Option(oldInstance).getOrElse(logger)
    }
  }
} 
Example 146
Source File: SequoiadbRDDIterator.scala    From spark-sequoiadb   with Apache License 2.0 5 votes vote down vote up
package com.sequoiadb.spark.rdd


import _root_.com.sequoiadb.spark.SequoiadbConfig
import _root_.com.sequoiadb.spark.io.SequoiadbReader
import org.apache.spark._
import org.apache.spark.sql.sources.Filter
import org.bson.BSONObject
import org.slf4j.{Logger, LoggerFactory}
//import java.io.FileOutputStream;  


class SequoiadbRDDIterator(
  taskContext: TaskContext,
  partition: Partition,
  config: SequoiadbConfig,
  requiredColumns: Array[String],
  filters: Array[Filter],
  queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON,
  queryLimit: Long = -1)
  extends Iterator[BSONObject] {

  
  
  private var LOG: Logger = LoggerFactory.getLogger(this.getClass.getName())
  protected var finished = false
  private var closed = false
  private var initialized = false

  lazy val reader = {
    initialized = true
    initReader()
  }

  // Register an on-task-completion callback to close the input stream.
  taskContext.addTaskCompletionListener((context: TaskContext) => closeIfNeeded())

  override def hasNext: Boolean = {
    !finished && reader.hasNext
  }

  override def next(): BSONObject = {
    if (!hasNext) {
      throw new NoSuchElementException("End of stream")
    }
    reader.next()
  }

  def closeIfNeeded(): Unit = {
    if (!closed) {
      close()
      closed = true
    }
  }

  protected def close(): Unit = {
    if (initialized) {
      reader.close()
    }
  }

  def initReader() = {
    val reader = new SequoiadbReader(config,requiredColumns,filters, queryReturnType, queryLimit)
    reader.init(partition)
    reader
  }
} 
Example 147
Source File: SequoiadbRDD.scala    From spark-sequoiadb   with Apache License 2.0 5 votes vote down vote up
package com.sequoiadb.spark.rdd

import org.apache.spark.SparkContext
import _root_.com.sequoiadb.spark.SequoiadbConfig
import com.sequoiadb.spark.partitioner._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.Filter
import org.apache.spark.{Partition, TaskContext}
import org.bson.BSONObject
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.ArrayBuffer
//import java.io.FileOutputStream;  


  def apply (
    sc: SQLContext,
    config: SequoiadbConfig,
    partitioner: Option[SequoiadbPartitioner] = None,
    requiredColumns: Array[String] = Array(),
    filters: Array[Filter] = Array(),
    queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON,
    queryLimit: Long = -1) = {
    new SequoiadbRDD ( sc.sparkContext, config, partitioner,
      requiredColumns, filters, queryReturnType, queryLimit)
  }
} 
Example 148
Source File: SequoiadbWriter.scala    From spark-sequoiadb   with Apache License 2.0 5 votes vote down vote up
package com.sequoiadb.spark.io


  def save(it: Iterator[Row], schema: StructType): Unit = {
    try {
      ds = Option(new SequoiadbDatasource (
          config[List[String]](SequoiadbConfig.Host),
          config[String](SequoiadbConfig.Username),
          config[String](SequoiadbConfig.Password),
          ConnectionUtil.initConfigOptions,
          ConnectionUtil.initSequoiadbOptions ))
      // pickup a connection
      connection = Option(ds.get.getConnection)
      
      // locate collection
      val cl = connection.get.getCollectionSpace(
          config[String](SequoiadbConfig.CollectionSpace)).getCollection(
              config[String](SequoiadbConfig.Collection))
      LOG.info ("bulksize = " + config[String](SequoiadbConfig.BulkSize))
      // loop through it and perform batch insert
      // batch size is defined in SequoiadbConfig.BulkSize
      val list : ArrayList[BSONObject] = new ArrayList[BSONObject]()
      while ( it.hasNext ) {
        val record = it.next
        val bsonrecord = SequoiadbRowConverter.rowAsDBObject ( record, schema )
        list.add(bsonrecord)
        if ( list.size >= config[String](SequoiadbConfig.BulkSize).toInt ) {
          cl.bulkInsert ( list, 0 )
          list.clear
        }
      }
      // insert rest of the record if there's any
      if ( list.size > 0 ) {
        cl.bulkInsert ( list, 0 )
        list.clear
      }
    } catch {
      case ex: Exception => throw SequoiadbException(ex.getMessage, ex)
    } finally {
      ds.fold(ifEmpty=()) { connectionpool =>
        connection.fold(ifEmpty=()) { conn =>
          connectionpool.close(conn)
        }
        connectionpool.close
      } // ds.fold(ifEmpty=())
    } // finally
  } // def save(it: Iterator[BSONObject]): Unit =
} 
Example 149
Source File: AbstractLoggingServiceRegistryClient.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.devmode.internal.registry

import java.net.URI

import org.slf4j.Logger
import org.slf4j.LoggerFactory

import scala.collection.immutable
import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.util.Failure
import scala.util.Success

private[lagom] abstract class AbstractLoggingServiceRegistryClient(implicit ec: ExecutionContext)
    extends ServiceRegistryClient {
  protected val log: Logger = LoggerFactory.getLogger(getClass)

  override def locateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]] = {
    require(
      serviceName != ServiceRegistryClient.ServiceName,
      "The service registry client cannot locate the service registry service itself"
    )
    log.debug("Locating service name=[{}] ...", serviceName)

    val location: Future[immutable.Seq[URI]] = internalLocateAll(serviceName, portName)

    location.onComplete {
      case Success(Nil) =>
        log.warn("serviceName=[{}] was not found. Hint: Maybe it was not started?", serviceName)
      case Success(uris) =>
        log.debug("serviceName=[{}] can be reached at uris=[{}]", serviceName: Any, uris: Any)
      case Failure(e) =>
        log.warn("Service registry replied with an error when looking up serviceName=[{}]", serviceName: Any, e: Any)
    }

    location
  }

  protected def internalLocateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]]
} 
Example 150
Source File: LivySubmit.scala    From spark-bench   with Apache License 2.0 5 votes vote down vote up
package com.ibm.sparktc.sparkbench.sparklaunch.submission.livy

import com.ibm.sparktc.sparkbench.sparklaunch.confparse.SparkJobConf
import com.ibm.sparktc.sparkbench.sparklaunch.submission.livy.LivySubmit._
import com.ibm.sparktc.sparkbench.sparklaunch.submission.Submitter
import com.ibm.sparktc.sparkbench.utils.SparkBenchException
import com.softwaremill.sttp.{Id, SttpBackend}
import org.slf4j.{Logger, LoggerFactory}

import scala.annotation.tailrec
import scala.sys.ShutdownHookThread

object LivySubmit {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  val successCode = 200

  import com.softwaremill.sttp._

  val emptyBodyException: SparkBenchException = SparkBenchException("REST call returned empty message body")
  val nonSuccessCodeException: Int => SparkBenchException = (code: Int) => SparkBenchException(s"REST call returned non-sucess code: $code")

  def apply(): LivySubmit = {
    new LivySubmit()(HttpURLConnectionBackend())
  }

  def cancelAllBatches(livyWithID: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyDelete] = {
    log.info(s"Cancelling batch request id: ${livyWithID.id}")
    val response = livyWithID.deleteRequest.send()
    (response.is200, response.body) match {
      case (true, Right(bod)) => if (bod.msg == "deleted") response else throw SparkBenchException(s"Unexpected status for delete request: ${bod.msg}")
      case (true, Left(b))    => throw emptyBodyException
      case (_, _)             => throw nonSuccessCodeException(response.code)
    }
  }

  def sendPostBatchRequest(conf: SparkJobConf)
                          (implicit backend: SttpBackend[Id, Nothing]):
                            (LivyRequestWithID, Response[ResponseBodyBatch]) = {
    val livyRequest = LivyRequest(conf)
    log.info(s"Sending Livy POST request:\n${livyRequest.postRequest.toString}")
    val response: Id[Response[ResponseBodyBatch]] = livyRequest.postRequest.send()
    (response.isSuccess, response.body) match {
      case (true, Left(_)) => throw emptyBodyException
      case (false, Left(_)) => throw nonSuccessCodeException(response.code)
      case (false, Right(bod)) => throw SparkBenchException(s"POST Request to ${livyRequest.postBatchUrl} failed:\n" +
        s"${bod.log.mkString("\n")}")
      case (_,_) => // no exception thrown
    }
    val livyWithID = LivyRequestWithID(livyRequest, response.body.right.get.id)
    (livyWithID, response)
  }

  private def pollHelper(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = {
    Thread.sleep(request.pollSeconds * 1000)
    log.info(s"Sending Livy status GET request:\n${request.statusRequest.toString}")
    val response: Id[Response[ResponseBodyState]] = request.statusRequest.send()
    response
  }

  @tailrec
  def poll(request: LivyRequestWithID, response: Response[ResponseBodyState])
          (implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = (response.isSuccess, response.body) match {
    case (false, _) => throw SparkBenchException(s"Request failed with code ${response.code}")
    case (_, Left(_)) => throw emptyBodyException
    case (true, Right(bod)) => bod.state match {
      case "success" => response
      case "dead" => throw SparkBenchException(s"Poll request failed with state: dead\n" + getLogs(request))
      case "running" => poll(request, pollHelper(request))
      case st => throw SparkBenchException(s"Poll request failed with state: $st")
    }
  }

  def getLogs(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): String = {
    val response = request.logRequest.send()
    (response.is200, response.body) match {
      case (true, Right(bod)) => bod.log.mkString("\n")
      case (false, Right(_)) => throw SparkBenchException(s"Log request failed with code: ${response.code}")
      case (_, Left(_)) => throw emptyBodyException
    }
  }
}

class LivySubmit()(implicit val backend: SttpBackend[Id, Nothing]) extends Submitter {
  override def launch(conf: SparkJobConf): Unit = {
    val (livyWithID, postResponse) = sendPostBatchRequest(conf)(backend)
    val shutdownHook: ShutdownHookThread = sys.ShutdownHookThread {
      // interrupt any batches
      cancelAllBatches(livyWithID)(backend)
    }
    val pollResponse = poll(livyWithID, pollHelper(livyWithID))(backend)
    // The request has completed, so we're going to remove the shutdown hook.
    shutdownHook.remove()
  }
} 
Example 151
Source File: CLIKickoff.scala    From spark-bench   with Apache License 2.0 5 votes vote down vote up
package com.ibm.sparktc.sparkbench.cli

import org.slf4j.{Logger, LoggerFactory}
import com.ibm.sparktc.sparkbench.workload.MultipleSuiteKickoff

object CLIKickoff extends App {
  override def main(args: Array[String]): Unit = {
    val log: Logger = LoggerFactory.getLogger(this.getClass)
    log.info(s"args received: ${args.mkString(", ")}")
    if(args.isEmpty) throw new IllegalArgumentException("CLIKickoff received no arguments")
    val oneStr = args.mkString(" ")
    val worksuites = Configurator(oneStr)
    MultipleSuiteKickoff.run(worksuites)
  }
} 
Example 152
Source File: RestartableStreamingApp.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import com.twilio.open.streaming.trend.discovery.config.Configuration
import com.twilio.open.streaming.trend.discovery.listeners.SparkStreamingQueryListener
import org.apache.spark.sql.SparkSession
import org.slf4j.Logger

trait StreamingApp[+Configuration] {
  val config: Configuration
  val logger: Logger
  def run(): Unit
}

trait Restartable {
  def restart(): Unit
}

trait RestartableStreamingApp[T <: Configuration] extends StreamingApp[T] with Restartable {
  val spark: SparkSession

  val streamingQueryListener: SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

  def monitoredRun(): Unit = {
    run()
    monitorStreams()
  }

  
  def restart(): Unit = {
    logger.info(s"restarting the application. cleaning up old stream listener and streams")

    val streams = spark.streams
    streams.removeListener(streamingQueryListener)
    streams.active.foreach { stream =>
      logger.info(s"stream_name=${stream.name} state=active status=${stream.status} action=stop_stream")
      stream.stop()
    }
    logger.info(s"attempting to restart the application")
    monitoredRun()
  }
} 
Example 153
Source File: SparkStreamingQueryListener.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

object SparkStreamingQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

}

class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import SparkStreamingQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)


  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong)
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong)
    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
} 
Example 154
Source File: EventAggregationSpec.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.util

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.protocol.Metrics
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer}
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
import org.apache.spark.sql._
import org.apache.spark.sql.kafka010.KafkaTestUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}

class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] {
  override val testUtils = new KafkaTestUtils[String, CallEvent] {
    override val keySerializer: Serializer[String] = new StringSerializer
    override val keyDeserializer: Deserializer[String] = new StringDeserializer
    override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer
    override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer
  }
  override protected val kafkaTopic = "spark.summit.call.events"
  override protected val partitions = 8

  private val pathToTestScenarios = "src/test/resources/scenarios"

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  test("Should aggregate call events") {
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    testUtils.createTopic(kafkaTopic, partitions, overwrite = true)
    sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime)

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)
    val eventAggregation = EventAggregation(appConfig)

    eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session)
      .writeStream
      .queryName("calleventaggs")
      .format("memory")
      .outputMode(eventAggregation.outputMode)
      .start()
      .processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.13

  }


}

class CallEventSerializer extends Serializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray
  override def close(): Unit = {}
}

class CallEventDeserializer extends Deserializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data)
  override def close(): Unit = {}
} 
Example 155
Source File: TestHelper.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

} 
Example 156
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 157
Source File: SafeThread.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.utils

import java.util.Scanner

import org.apache.kafka.common.errors.InterruptException
import org.slf4j.Logger

abstract class SafeThread(logger: Logger) extends Thread {

  def runSafely(): Unit

  override def run(): Unit = {
    try {
      runSafely()
    }
    catch {
      case exception: InterruptException =>
        // This usually happens during consumer.poll().
        logger.info("Kafka interruption") // This is expected.
      case exception: InterruptedException =>
        logger.info("Java interruption") // This is expected.
      case exception: Throwable =>
        logger.error("Consumer interruption", exception)
    }
    finally {
      // This seems to be the only way to "cancel" the scanner.nextLine.
      System.exit(0)
    }
  }

  def waitSafely(duration: Long): Unit = SafeThread.waitSafely(this, logger, duration)

  start
}

object SafeThread {

  def waitSafely(thread: Thread, logger: Logger, duration: Long): Unit = {
    try {
      println("Press ENTER to exit...")
      new Scanner(System.in).nextLine()
      logger.info("User interruption")
      ThreadUtils.stop(thread, duration)
      logger.info("Exiting...")
    }
    catch {
      case _: Throwable => logger.info("Exiting...")
    }
  }
} 
Example 158
Source File: KafkaConsumer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.wmconsumer

import java.io.File
import java.time.Duration
import java.util.Collections
import java.util.ConcurrentModificationException
import java.util.Properties

import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer}
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.FileEditor
import org.json4s._
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) {
  import KafkaConsumer._
  implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

  logger.info("Opening consumer...")

  protected val consumer: ApacheKafkaConsumer[String, String] = {
    val consumer = new ApacheKafkaConsumer[String, String](properties)

    consumer.subscribe(Collections.singletonList(topic))
    consumer
  }

  def poll(duration: Int): Unit = {
    val records = consumer.poll(Duration.ofSeconds(duration))

    logger.info(s"Polling ${records.count} records...")
    records.forEach { record =>
      val key = record.key
      val value = record.value
      // Imply an extension on the file so that it can be replaced.
      val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get
      logger.info("Consuming " + file.getName)

      FileUtils.printWriterFromFile(file).autoClose { printWriter =>
        printWriter.print(value)
      }
    }
  }

  def close(): Unit = {
    logger.info("Closing consumer...")
    try {
      consumer.close(Duration.ofSeconds(closeDuration))
    }
    catch {
      case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access
    }
  }
}

object KafkaConsumer {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
} 
Example 159
Source File: KafkaConsumerApp.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.wmconsumer

import java.util.Properties

import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.WmUserApp
import org.clulab.wm.wmexchanger.utils.SafeThread
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumerApp(args: Array[String]) extends WmUserApp(args,  "/kafkaconsumer.properties") {
  val localKafkaProperties: Properties = {
    // This allows the login to be contained in a file external to the project.
    val loginProperty = appProperties.getProperty("login")
    val loginPropertiesBuilder = PropertiesBuilder.fromFile(loginProperty)

    PropertiesBuilder(kafkaProperties).putAll(loginPropertiesBuilder).get
  }

  val topic: String = appProperties.getProperty("topic")
  val outputDir: String = appProperties.getProperty("outputDir")

  val pollDuration: Int = appProperties.getProperty("poll.duration").toInt
  val waitDuration: Long = appProperties.getProperty("wait.duration").toLong
  val closeDuration: Int = appProperties.getProperty("close.duration").toInt

  val thread: SafeThread = new SafeThread(KafkaConsumerApp.logger) {
    override def runSafely(): Unit = {
      val consumer = new KafkaConsumer(localKafkaProperties, closeDuration, topic, outputDir)

      // autoClose isn't executed if the thread is shot down, so this hook is used instead.
      sys.ShutdownHookThread { consumer.close() }
      while (!isInterrupted)
        consumer.poll(pollDuration)
    }
  }

  if (interactive)
    thread.waitSafely(waitDuration)
}

object KafkaConsumerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  new KafkaConsumerApp(args)
} 
Example 160
Source File: CurlProducerApp.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.wmproducer

import com.typesafe.config.ConfigFactory
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.Sinker
import org.clulab.wm.wmexchanger.utils.StringUtils
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object CurlProducerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val version = "0.2.3"

  val inputDir = args(0)
  val outputFile = args(1)

  val config = ConfigFactory.load("curlproducer")
  val service = config.getString("CurlProducerApp.service")
  val login = config.getString("CurlProducerApp.login")
  val properties = PropertiesBuilder.fromFile(login).get
  val username = properties.getProperty("username")
  val password = properties.getProperty("password")

  val files = FileUtils.findFiles(inputDir, "jsonld")

  Sinker.printWriterFromFile(outputFile).autoClose { printWriter =>
    files.foreach { file =>
      logger.info(s"Processing ${file.getName}")
      val docId = StringUtils.beforeFirst(file.getName, '.')
      try {
        val command = s"""curl
            |--basic
            |--user "$username:$password"
            |-X POST "$service"
            |-H "accept: application/json"
            |-H "Content-Type: multipart/form-data"
            |-F 'metadata={ "identity": "eidos", "version": "$version", "document_id": "$docId" }'
            |-F "file=@${file.getName}"
            |""".stripMargin.replace('\r', ' ').replace('\n', ' ')

        printWriter.print(command)
        printWriter.print("\n")
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
  }
} 
Example 161
Source File: DocumentFilter.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import org.clulab.processors.corenlp.CoreNLPDocument
import org.clulab.processors.shallownlp.ShallowNLPProcessor
import org.clulab.processors.{Document, Processor}
import org.slf4j.{Logger, LoggerFactory}

trait DocumentFilter {
  def whileFiltered(document: Document)(transform: Document => Document): Document
}

class FilterByNothing extends DocumentFilter {

  def whileFiltered(doc: Document)(transform: Document => Document): Document = transform(doc)
}

object FilterByNothing {
  def apply() = new FilterByNothing
}


class FilterByLength(processor: Processor, cutoff: Int = 200) extends DocumentFilter {

  def whileFiltered(doc: Document)(transform: Document => Document): Document = {
    val text = doc.text
    val filteredDoc = filter(doc)
    val transformedDoc = transform(filteredDoc)
    val unfilteredDoc = unfilter(transformedDoc, text)

    unfilteredDoc
  }

  protected def unfilter(doc: Document, textOpt: Option[String]): Document = {
    doc.text = textOpt
    doc
  }

  protected def filter(doc: Document): Document = {
    // Iterate through the sentences, any sentence that is too long (number of tokens), remove
    val sanitizedText = sanitizeText(doc)
    val kept = doc.sentences.filter(s => s.words.length < cutoff)
    val skipped = doc.sentences.length - kept.length
    val newDoc = Document(doc.id, kept, doc.coreferenceChains, doc.discourseTree, sanitizedText)
    val newerDoc = // This is a hack for lack of copy constructor for CoreNLPDocument
      if (doc.isInstanceOf[CoreNLPDocument])
        ShallowNLPProcessor.cluDocToCoreDoc(newDoc, keepText = true)
      else
        newDoc
    if (skipped != 0)
      FilterByLength.logger.info(s"skipping $skipped sentences")
    // Return a new document from these sentences
    newerDoc
  }

  protected def sanitizeText(doc: Document): Option[String] = doc.text.map { text =>
    // Assume that these characters are never parts of words.
    var newText = text.replace('\n', ' ').replace(0x0C.toChar, ' ')
    for (s <- doc.sentences if s.endOffsets.last < newText.size) {
      // Only perform this if it isn't part of a word.  A space is most reliable.
      if (newText(s.endOffsets.last) == ' ')
        newText = newText.updated(s.endOffsets.last, '\n')
    }
    newText
  }
}

object FilterByLength {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply(processor: Processor, cutoff: Int = 200): FilterByLength = new FilterByLength(processor, cutoff)
} 
Example 162
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 163
Source File: Timer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable

class Timer(val description: String) {
  var elapsedTime: Option[Long] = None
  var startTime: Option[Long] = None

  def time[R](block: => R): R = {
    val t0 = System.currentTimeMillis()
    val result: R = block    // call-by-name
    val t1 = System.currentTimeMillis()

    elapsedTime = Some(t1 - t0)
    result
  }

  def start(): Unit = {
    val t0 = System.currentTimeMillis()

    startTime = Some(t0)
  }

  def stop(): Unit = {
    if (startTime.isDefined) {
      val t1 = System.currentTimeMillis()

      elapsedTime = Some(t1 - startTime.get)
    }
  }

  override def toString: String = {
    if (elapsedTime.isDefined)
      s"\tTime\t$description\t${Timer.diffToString(elapsedTime.get)}"
    else if (startTime.isDefined)
      s"\tStart\t$description\t${startTime.get}\tms"
    else
      s"\tTimer\t$description"
  }
}

object Timer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val elapsedTimes: mutable.Map[String, Long] = mutable.Map.empty

  def addTime(key: String, milliseconds: Long): Unit = this synchronized {
    val oldTime = elapsedTimes.getOrElseUpdate(key, 0)
    val newTime = oldTime + milliseconds

    elapsedTimes.update(key, newTime)
  }

  def summarize: Unit = {
    elapsedTimes.toSeq.sorted.foreach { case (key, milliseconds) =>
      logger.info(s"\tTotal\t$key\t$milliseconds")
    }
  }

  def diffToString(diff: Long): String = {
    val  days = (diff / (1000 * 60 * 60 * 24)) / 1
    val hours = (diff % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60)
    val  mins = (diff % (1000 * 60 * 60)) / (1000 * 60)
    val  secs = (diff % (1000 * 60)) / 1000
    val msecs = (diff % (1000 * 1)) / 1

    f"$days:$hours%02d:$mins%02d:$secs%02d.$msecs%03d"
  }

  // See http://biercoff.com/easily-measuring-code-execution-time-in-scala/
  def time[R](description: String, verbose: Boolean = true)(block: => R): R = {
    val t0 = System.currentTimeMillis()
    if (verbose) logger.info(s"\tStart\t$description\t$t0\tms")

    val result: R = block // call-by-name

    val t1 = System.currentTimeMillis()
    if (verbose) logger.info(s"\tStop\t$description\t$t1\tms")

    val diff = t1 - t0
    if (verbose) logger.info(s"\tDiff\t$description\t$diff\tms")
    if (verbose) logger.info(s"\tTime\t$description\t${diffToString(diff)}")
    addTime(description, diff)
    result
  }
} 
Example 164
Source File: DomainHandler.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.groundings

import java.time.ZonedDateTime

import com.github.clulab.eidos.Version
import com.github.clulab.eidos.Versions
import com.github.worldModelers.ontologies.{Versions => AwayVersions}
import org.clulab.wm.eidos.SentencesExtractor
import org.clulab.wm.eidos.groundings.ontologies.FullTreeDomainOntology.FullTreeDomainOntologyBuilder
import org.clulab.wm.eidos.groundings.OntologyHandler.serializedPath
import org.clulab.wm.eidos.groundings.ontologies.CompactDomainOntology
import org.clulab.wm.eidos.groundings.ontologies.FastDomainOntology
import org.clulab.wm.eidos.groundings.ontologies.HalfTreeDomainOntology.HalfTreeDomainOntologyBuilder
import org.clulab.wm.eidos.utils.Canonicalizer
import org.clulab.wm.eidos.utils.StringUtils
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object DomainHandler {
  protected lazy val logger: Logger = LoggerFactory.getLogger(getClass)

  // The intention is to stop the proliferation of the generated Version class to this single method.
  protected def getVersionOpt(ontologyPath: String): (Option[String], Option[ZonedDateTime]) = {
    // This should work for local ontologies.  Absolute
    val goodVersionOpt = Versions.versions.get(MockVersions.codeDir + ontologyPath)
    // See what might have come from WordModelers/Ontologies
    val bestVersionOpt = goodVersionOpt.getOrElse {
      // These are always stored in top level directory.
      val awayVersionOpt = AwayVersions.versions.get(StringUtils.afterLast(ontologyPath, '/')).getOrElse(None)
      val homeVersionOpt = awayVersionOpt.map { awayVersion => Version(awayVersion.commit, awayVersion.date) }

      homeVersionOpt
    }

    if (bestVersionOpt.isDefined)
      (Some(bestVersionOpt.get.commit), Some(bestVersionOpt.get.date))
    else
      (None, None)
  }

  def apply(ontologyPath: String, serializedPath: String, sentencesExtractor: SentencesExtractor,
      canonicalizer: Canonicalizer, filter: Boolean = true, useCacheForOntologies: Boolean = false,
      includeParents: Boolean = false): DomainOntology = {

    // As coded below, when parents are included, the FullTreeDomainOntology is being used.
    // The faster loading version is the FastDomainOntology.
    // If parents are not included, as had traditionally been the case, the HalfTreeDomainOntology suffices.
    // Being smaller and faster, it is preferred.  The faster loading counterpart is CompactDomainOntology.
    if (includeParents) {
      if (useCacheForOntologies) {
        logger.info(s"Processing cached yml ontology with parents from $serializedPath...")
        FastDomainOntology.load(serializedPath)
      }
      else {
        logger.info(s"Processing yml ontology with parents from $ontologyPath...")
        val (versionOpt, dateOpt) = getVersionOpt(ontologyPath)
        new FullTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt)
      }
    }
    else {
      if (useCacheForOntologies) {
        logger.info(s"Processing cached yml ontology without parents from $serializedPath...")
        CompactDomainOntology.load(serializedPath)
      }
      else {
        logger.info(s"Processing yml ontology without parents from $ontologyPath...")
        val (versionOpt, dateOpt) = getVersionOpt(ontologyPath)
        new HalfTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt)
      }
    }
  }

  def mkDomainOntology(name: String, ontologyPath: String, sentenceExtractor: SentencesExtractor,
      canonicalizer: Canonicalizer, cacheDir: String, useCacheForOntologies: Boolean,
      includeParents: Boolean): DomainOntology = {
    val ontSerializedPath: String = serializedPath(name, cacheDir, includeParents)

    DomainHandler(ontologyPath, ontSerializedPath, sentenceExtractor, canonicalizer: Canonicalizer, filter = true,
        useCacheForOntologies = useCacheForOntologies, includeParents = includeParents)
  }
} 
Example 165
Source File: ExtractCluMetaFromDirectory.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.apps.batch

import java.io.File

import org.clulab.serialization.json.stringify
import org.clulab.wm.eidos.EidosSystem
import org.clulab.wm.eidos.groundings.EidosAdjectiveGrounder
import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileEditor
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.ThreadUtils
import org.clulab.wm.eidos.utils.Timer
import org.clulab.wm.eidos.utils.meta.CluText
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object ExtractCluMetaFromDirectory extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val inputDir = args(0)
  val metaDir = args(1)
  val outputDir = args(2)
  val timeFile = args(3)
  val threads = args(4).toInt

  val doneDir = inputDir + "/done"
  val textToMeta = CluText.convertTextToMeta _

  val files = FileUtils.findFiles(inputDir, "txt")
  val parFiles = ThreadUtils.parallelize(files, threads)

  Timer.time("Whole thing") {
    val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile)
    timePrintWriter.println("File\tSize\tTime")
    val timer = new Timer("Startup")

    timer.start()
    // Prime it first.  This counts on overall time, but should not be attributed
    // to any particular document.
    val config = EidosSystem.defaultConfig
    val reader = new EidosSystem(config)
    val options = EidosSystem.Options()
    // 0. Optionally include adjective grounding
    val adjectiveGrounder = EidosAdjectiveGrounder.fromEidosConfig(config)

    reader.extractFromText("This is a test.")
    timer.stop()

    timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get)

    parFiles.foreach { file =>
      try {
        // 1. Open corresponding output file
        logger.info(s"Extracting from ${file.getName}")
        val timer = new Timer("Single file in parallel")
        val size = timer.time {
          // 2. Get the input file text and metadata
          val metafile = textToMeta(file, metaDir)
          val eidosText = CluText(reader, file, Some(metafile))
          val text = eidosText.getText
          val metadata = eidosText.getMetadata
          // 3. Extract causal mentions from the text
          val annotatedDocument = reader.extractFromText(text, options, metadata)
          // 4. Convert to JSON
          val corpus = new JLDCorpus(annotatedDocument)
          val mentionsJSONLD = corpus.serialize()
          // 5. Write to output file
          val path = CluText.convertTextToJsonld(file, outputDir)
          FileUtils.printWriterFromFile(path).autoClose { pw =>
            pw.println(stringify(mentionsJSONLD, pretty = true))
          }
          // Now move the file to directory done
          val newFile = FileEditor(file).setDir(doneDir).get
          file.renameTo(newFile)

          text.length
        }
        this.synchronized {
          timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get)
        }
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
    timePrintWriter.close()
  }
} 
Example 166
Source File: ExtractCdrMetaFromDirectory.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.apps.batch

import org.clulab.wm.eidos.EidosSystem
import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileEditor
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.ThreadUtils
import org.clulab.wm.eidos.utils.Timer
import org.clulab.wm.eidos.utils.meta.CdrText
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object ExtractCdrMetaFromDirectory extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val inputDir = args(0)
  val outputDir = args(1)
  val timeFile = args(2)
  val threads = args(3).toInt

  val doneDir = inputDir + "/done"

  val files = FileUtils.findFiles(inputDir, "json")
  val parFiles = ThreadUtils.parallelize(files, threads)

  Timer.time("Whole thing") {
    val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile)
    timePrintWriter.println("File\tSize\tTime")
    val timer = new Timer("Startup")

    timer.start()
    // Prime it first.  This counts on overall time, but should not be attributed
    // to any particular document.
    val reader = new EidosSystem()
    val options = EidosSystem.Options()

    Timer.time("EidosPrimer") {
      reader.extractFromText("This is a test.")
    }
    timer.stop()
    timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get)

    parFiles.foreach { file =>
      try {
        // 1. Open corresponding output file
        logger.info(s"Extracting from ${file.getName}")
        val timer = new Timer("Single file in parallel")
        val size = timer.time {
          // 1. Get the input file text and metadata
          val eidosText = CdrText(file)
          val text = eidosText.getText
          val metadata = eidosText.getMetadata
          // 2. Extract causal mentions from the text
          val annotatedDocument = reader.extractFromText(text, options, metadata)
          // 3. Write to output file
          val path = FileEditor(file).setDir(outputDir).setExt("jsonld").get
          FileUtils.printWriterFromFile(path).autoClose { printWriter =>
            new JLDCorpus(annotatedDocument).serialize(printWriter)
          }
          // Now move the file to directory done
          val newFile = FileEditor(file).setDir(doneDir).get
          file.renameTo(newFile)

          text.length
        }
        this.synchronized {
          timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get)
        }
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
    timePrintWriter.close()
  }
} 
Example 167
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.elasticsearch.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 168
Source File: SignalLogger.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import org.apache.commons.lang3.SystemUtils
import org.slf4j.Logger
import sun.misc.{Signal, SignalHandler}


  def register(log: Logger): Unit = synchronized {
    if (SystemUtils.IS_OS_UNIX) {
      require(!registered, "Can't re-install the signal handlers")
      registered = true

      val signals = Seq("TERM", "HUP", "INT")
      for (signal <- signals) {
        try {
          new SignalLoggerHandler(signal, log)
        } catch {
          case e: Exception => log.warn("Failed to register signal handler " + signal, e)
        }
      }
      log.info("Registered signal handlers for [" + signals.mkString(", ") + "]")
    }
  }
}

private sealed class SignalLoggerHandler(name: String, log: Logger) extends SignalHandler {

  val prevHandler = Signal.handle(new Signal(name), this)

  override def handle(signal: Signal): Unit = {
    log.error("RECEIVED SIGNAL " + signal.getNumber() + ": SIG" + signal.getName())
    prevHandler.handle(signal)
  }
} 
Example 169
Source File: ActorLogReceive.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import akka.actor.Actor
import org.slf4j.Logger


private[spark] trait ActorLogReceive {
  self: Actor =>

  override def receive: Actor.Receive = new Actor.Receive {

    private val _receiveWithLogging = receiveWithLogging

    override def isDefinedAt(o: Any): Boolean = _receiveWithLogging.isDefinedAt(o)

    override def apply(o: Any): Unit = {
      if (log.isDebugEnabled) {
        log.debug(s"[actor] received message $o from ${self.sender}")
      }
      val start = System.nanoTime
      _receiveWithLogging.apply(o)
      val timeTaken = (System.nanoTime - start).toDouble / 1000000
      if (log.isDebugEnabled) {
        log.debug(s"[actor] handled message ($timeTaken ms) $o from ${self.sender}")
      }
    }
  }

  def receiveWithLogging: Actor.Receive

  protected def log: Logger
} 
Example 170
Source File: CarbonThriftServer.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.thriftserver

import java.io.File

import org.apache.spark.SparkConf
import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
import org.slf4j.{Logger, LoggerFactory}

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.spark.util.CarbonSparkUtil


object CarbonThriftServer {

  def main(args: Array[String]): Unit = {
    if (args.length != 0 && args.length != 3) {
      val logger: Logger = LoggerFactory.getLogger(this.getClass)
      logger.error("parameters: [access-key] [secret-key] [s3-endpoint]")
      System.exit(0)
    }
    val sparkConf = new SparkConf(loadDefaults = true)
    val builder = SparkSession
      .builder()
      .config(sparkConf)
      .appName("Carbon Thrift Server(uses CarbonExtensions)")
      .enableHiveSupport()
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
    configPropertiesFile(sparkConf, builder)
    if (args.length == 3) {
      builder.config(CarbonSparkUtil.getSparkConfForS3(args(0), args(1), args(2)))
    }
    val spark = builder.getOrCreate()
    CarbonEnv.getInstance(spark)
    waitingForSparkLaunch()
    HiveThriftServer2.startWithContext(spark.sqlContext)
  }

  private def waitingForSparkLaunch(): Unit = {
    val warmUpTime = CarbonProperties.getInstance().getProperty("carbon.spark.warmUpTime", "5000")
    try {
      Thread.sleep(Integer.parseInt(warmUpTime))
    } catch {
      case e: Exception =>
        val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
        LOG.error(s"Wrong value for carbon.spark.warmUpTime $warmUpTime " +
                  "Using default Value and proceeding")
        Thread.sleep(5000)
    }
  }

  private def configPropertiesFile(sparkConf: SparkConf, builder: SparkSession.Builder): Unit = {
    sparkConf.contains("carbon.properties.filepath") match {
      case false =>
        val sparkHome = System.getenv.get("SPARK_HOME")
        if (null != sparkHome) {
          val file = new File(sparkHome + '/' + "conf" + '/' + "carbon.properties")
          if (file.exists()) {
            builder.config("carbon.properties.filepath", file.getCanonicalPath)
            System.setProperty("carbon.properties.filepath", file.getCanonicalPath)
          }
        }
      case true =>
        System.setProperty(
          "carbon.properties.filepath", sparkConf.get("carbon.properties.filepath"))
    }
  }
} 
Example 171
Source File: S3CsvExample.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.examples

import java.io.File

import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, SECRET_KEY}
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

object S3CsvExample {

  
  def main(args: Array[String]) {
    val rootPath = new File(this.getClass.getResource("/").getPath
                            + "../../../..").getCanonicalPath
    val logger: Logger = LoggerFactory.getLogger(this.getClass)

    import org.apache.spark.sql.CarbonUtils._
    if (args.length != 4) {
      logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" +
                   "<s3.csv.location> <spark-master>")
      System.exit(0)
    }

    val spark = SparkSession
      .builder()
      .master(args(3))
      .appName("S3CsvExample")
      .config("spark.driver.host", "localhost")
      .config("spark.hadoop." + ACCESS_KEY, args(0))
      .config("spark.hadoop." + SECRET_KEY, args(1))
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()

    spark.sparkContext.setLogLevel("ERROR")

    spark.sql(
      s"""
         | CREATE TABLE if not exists carbon_table1(
         | shortField SHORT,
         | intField INT,
         | bigintField LONG,
         | doubleField DOUBLE,
         | stringField STRING,
         | timestampField TIMESTAMP,
         | decimalField DECIMAL(18,2),
         | dateField DATE,
         | charField CHAR(5),
         | floatField FLOAT
         | )
         | STORED AS carbondata
         | LOCATION '$rootPath/examples/spark/target/store'
       """.stripMargin)

    spark.sql(
      s"""
         | LOAD DATA LOCAL INPATH '${ args(2) }'
         | INTO TABLE carbon_table1
         | OPTIONS('HEADER'='true')
       """.stripMargin)

    spark.sql(
      s"""
         | LOAD DATA LOCAL INPATH '${ args(2) }'
         | INTO TABLE carbon_table1
         | OPTIONS('HEADER'='true')
       """.stripMargin)

    spark.sql(
      s"""
         | SELECT *
         | FROM carbon_table1
      """.stripMargin).show()

    spark.sql("Drop table if exists carbon_table1")

    spark.stop()
  }
} 
Example 172
Source File: S3UsingSDkExample.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.examples

import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, ENDPOINT, SECRET_KEY}
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field}
import org.apache.carbondata.sdk.file.{CarbonWriter, Schema}
import org.apache.carbondata.spark.util.CarbonSparkUtil



  def main(args: Array[String]) {
    val logger: Logger = LoggerFactory.getLogger(this.getClass)

    if (args.length < 2 || args.length > 6) {
      logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" +
        "[table-path-on-s3] [s3-endpoint] [number-of-rows] [spark-master]")
      System.exit(0)
    }

    val (accessKey, secretKey, endpoint) = CarbonSparkUtil.getKeyOnPrefix(args(2))
    val spark = SparkSession
      .builder()
      .master(getSparkMaster(args))
      .appName("S3UsingSDKExample")
      .config("spark.driver.host", "localhost")
      .config(accessKey, args(0))
      .config(secretKey, args(1))
      .config(endpoint, CarbonSparkUtil.getS3EndPoint(args))
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")
    val path = if (args.length < 3) {
      "s3a://sdk/WriterOutput2 "
    } else {
      args(2)
    }
    val num = if (args.length > 4) {
      Integer.parseInt(args(4))
    } else {
      3
    }
    buildTestData(args, path, num)

    spark.sql("DROP TABLE IF EXISTS s3_sdk_table")
    spark.sql(s"CREATE EXTERNAL TABLE s3_sdk_table STORED AS carbondata" +
      s" LOCATION '$path'")
    spark.sql("SELECT * FROM s3_sdk_table LIMIT 10").show()
    spark.stop()
  }

  def getSparkMaster(args: Array[String]): String = {
    if (args.length == 6) args(5)
    else "local"
  }

} 
Example 173
Source File: ViewTestSupport.scala    From ddd-leaven-akka-v2   with MIT License 5 votes vote down vote up
package ecommerce.sales.view

import com.typesafe.config.Config
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.Logger
import org.slf4j.LoggerFactory.getLogger
import pl.newicom.dddd.view.sql.SqlViewStore
import slick.dbio._

import scala.concurrent.ExecutionContext
import slick.jdbc.H2Profile

trait ViewTestSupport extends BeforeAndAfterAll with ScalaFutures {
  this: Suite =>

  def config: Config
  lazy val viewStore = new SqlViewStore(config)
  val log: Logger = getLogger(getClass)

  implicit val profile = H2Profile

  implicit class ViewStoreAction[A](a: DBIO[A])(implicit ex: ExecutionContext) {
    private val future = viewStore.run(a)

    def run(): Unit = future.map(_ => ()).futureValue
    def result: A = future.futureValue
  }

  def ensureSchemaDropped: DBIO[Unit]
  def ensureSchemaCreated: DBIO[Unit]

  implicit override val patienceConfig = PatienceConfig(
    timeout = scaled(Span(10, Seconds)),
    interval = scaled(Span(200, Millis))
  )

  override def beforeAll() {
    viewStore.run {
      ensureSchemaDropped >> ensureSchemaCreated
    }.futureValue

  }

} 
Example 174
Source File: ViewTestSupport.scala    From ddd-leaven-akka-v2   with MIT License 5 votes vote down vote up
package ecommerce.sales.view

import com.typesafe.config.Config
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.Logger
import org.slf4j.LoggerFactory.getLogger
import pl.newicom.dddd.view.sql.SqlViewStore
import slick.dbio._
import slick.jdbc.H2Profile

import scala.concurrent.ExecutionContext

trait ViewTestSupport extends BeforeAndAfterAll with ScalaFutures {
  this: Suite =>

  def config: Config
  lazy val viewStore = new SqlViewStore(config)
  val log: Logger = getLogger(getClass)

  implicit val profile = H2Profile

  implicit override val patienceConfig = PatienceConfig(
    timeout = scaled(Span(5, Seconds)),
    interval = scaled(Span(200, Millis))
  )

  implicit class ViewStoreAction[A](a: DBIO[A])(implicit ex: ExecutionContext) {
    private val future = viewStore.run(a)

    def run(): Unit = future.map(_ => ()).futureValue
    def result: A = future.futureValue
  }

  def ensureSchemaDropped: DBIO[Unit]
  def ensureSchemaCreated: DBIO[Unit]

  override def beforeAll() {
    val setup = viewStore.run {
      ensureSchemaDropped >> ensureSchemaCreated
    }
    assert(setup.isReadyWithin(Span(5, Seconds)))

  }

} 
Example 175
Source File: HeadquartersConfiguration.scala    From ddd-leaven-akka-v2   with MIT License 5 votes vote down vote up
package ecommerce.headquarters.app

import java.util.UUID

import akka.actor.{Props, _}
import com.typesafe.config.Config
import ecommerce.headquarters.app.HeadquartersConfiguration._
import ecommerce.headquarters.processes.OrderProcessManager
import ecommerce.shipping.ShipmentId
import org.slf4j.Logger
import pl.newicom.dddd.actor.{ActorFactory, DefaultConfig, PassivationConfig}
import pl.newicom.dddd.aggregate.{AggregateRootActorFactory, AggregateRootLogger}
import pl.newicom.dddd.coordination.ReceptorConfig
import pl.newicom.dddd.office.LocalOfficeId
import pl.newicom.dddd.process._
import pl.newicom.dddd.scheduling.{Scheduler, SchedulerEvent, schedulingOfficeId}
import pl.newicom.eventstore.EventstoreSubscriber

import scala.concurrent.duration._

object HeadquartersConfiguration {
  val HQDepartment: String = "Headquarters"
}

trait HeadquartersConfiguration {

  def log: Logger
  def config: Config
  implicit def system: ActorSystem

  implicit val schedulingOfficeID: LocalOfficeId[Scheduler] = schedulingOfficeId(HQDepartment)
  implicit val commandQueueOfficeID: LocalOfficeId[CommandSink] = commandSinkOfficeId(HQDepartment)

  implicit object SchedulerFactory extends AggregateRootActorFactory[Scheduler] {
    override def props(pc: PassivationConfig) = Props(new Scheduler(DefaultConfig(pc, replyWithEvents = false)) with AggregateRootLogger[SchedulerEvent] {
      // TODO not needed
      override def id = "global"
    })
  }

  implicit object CommandSinkFactory extends AggregateRootActorFactory[CommandSink] {
    override def props(pc: PassivationConfig) = Props(new CommandSink(DefaultConfig(pc, replyWithEvents = false)) with AggregateRootLogger[CommandEnqueued])
  }

  implicit object OrderProcessManagerActorFactory extends SagaActorFactory[OrderProcessManager] {
    def props(pc: PassivationConfig): Props =
      Props(new OrderProcessManager(DefaultConfig(pc, replyWithEvents = false), () => new ShipmentId(UUID.randomUUID().toString)))
  }

  implicit def receptorActorFactory[A : LocalOfficeId : ActorFactory]: ReceptorActorFactory[A] = new ReceptorActorFactory[A] {
    def receptorFactory: ReceptorFactory = (config: ReceptorConfig) => {
      new Receptor(config.copy(capacity = 100)) with EventstoreSubscriber {
        override def redeliverInterval: FiniteDuration = 10.seconds
      }
    }
  }

} 
Example 176
Source File: DeltaLoad.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.algo

import com.adidas.analytics.algo.DeltaLoad._
import com.adidas.analytics.algo.core.Algorithm
import com.adidas.analytics.algo.shared.DateComponentDerivation
import com.adidas.analytics.config.DeltaLoadConfiguration.PartitionedDeltaLoadConfiguration
import com.adidas.analytics.util.DataFrameUtils._
import com.adidas.analytics.util._
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import org.apache.spark.storage.StorageLevel
import org.slf4j.{Logger, LoggerFactory}


  private def getUpsertRecords(deltaRecords: Dataset[Row], resultColumns: Seq[String]): Dataset[Row] = {
    // Create partition window - Partitioning by delta records logical key (i.e. technical key of active records)
    val partitionWindow = Window
      .partitionBy(businessKey.map(col): _*)
      .orderBy(technicalKey.map(component => col(component).desc): _*)

    // Ranking & projection
    val rankedDeltaRecords = deltaRecords
      .withColumn(rankingColumnName, row_number().over(partitionWindow))
      .filter(upsertRecordsModesFilterFunction)

    rankedDeltaRecords
      .filter(rankedDeltaRecords(rankingColumnName) === 1)
      .selectExpr(resultColumns: _*)
  }

  protected def withDatePartitions(spark: SparkSession, dfs: DFSWrapper, dataFrames: Vector[DataFrame]): Vector[DataFrame] = {
    logger.info("Adding partitioning information if needed")
    try {
      dataFrames.map { df =>
        if (df.columns.toSeq.intersect(targetPartitions) != targetPartitions){
          df.transform(withDateComponents(partitionSourceColumn, partitionSourceColumnFormat, targetPartitions))
        }
        else df
      }
    } catch {
      case e: Throwable =>
        logger.error("Cannot add partitioning information for data frames.", e)
        //TODO: Handle failure case properly
        throw new RuntimeException("Unable to transform data frames.", e)
    }
  }
}


object DeltaLoad {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): DeltaLoad = {
    new DeltaLoad(spark, dfs, configLocation)
  }
} 
Example 177
Source File: GzipDecompressor.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.algo

import java.util.concurrent.{Executors, TimeUnit}

import com.adidas.analytics.algo.GzipDecompressor.{changeFileExtension, compressedExtension, _}
import com.adidas.analytics.algo.core.JobRunner
import com.adidas.analytics.config.GzipDecompressorConfiguration
import com.adidas.analytics.util.DFSWrapper
import com.adidas.analytics.util.DFSWrapper._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.IOUtils
import org.apache.hadoop.io.compress.CompressionCodecFactory
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent._
import scala.concurrent.duration._


final class GzipDecompressor protected(val spark: SparkSession, val dfs: DFSWrapper, val configLocation: String)
  extends JobRunner with GzipDecompressorConfiguration {

  private val hadoopConfiguration: Configuration = spark.sparkContext.hadoopConfiguration
  private val fileSystem: FileSystem = dfs.getFileSystem(inputDirectoryPath)


  override def run(): Unit = {
    //check if directory exists
    if (!fileSystem.exists(inputDirectoryPath)){
      logger.error(s"Input directory: $inputDirectoryPath does not exist.")
      throw new RuntimeException(s"Directory $inputDirectoryPath does not exist.")
    }

    val compressedFilePaths = fileSystem.ls(inputDirectoryPath, recursive)
      .filterNot(path => fileSystem.isDirectory(path))
      .filter(_.getName.toLowerCase.endsWith(compressedExtension))

    if (compressedFilePaths.isEmpty) {
      logger.warn(s"Input directory $inputDirectoryPath does not contain compressed files. Skipping...")
    } else {
      implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(threadPoolSize))
      Await.result(Future.sequence(
        compressedFilePaths.map { compressedFilePath =>
          Future {
            logger.info(s"Decompressing file: $compressedFilePath")

            val decompressedFileName = changeFileExtension(compressedFilePath.getName, compressedExtension, outputExtension)
            val decompressedFilePath = new Path(compressedFilePath.getParent, decompressedFileName)

            val compressionCodecFactory = new CompressionCodecFactory(hadoopConfiguration)
            val inputCodec = compressionCodecFactory.getCodec(compressedFilePath)

            val inputStream = inputCodec.createInputStream(fileSystem.open(compressedFilePath))
            val output = fileSystem.create(decompressedFilePath)

            IOUtils.copyBytes(inputStream, output, hadoopConfiguration)
            logger.info(s"Finished decompressing file: $compressedFilePath")

            //Delete the compressed file
            fileSystem.delete(compressedFilePath, false)
            logger.info(s"Removed file: $compressedFilePath")
          }
        }
      ), Duration(4, TimeUnit.HOURS))
    }
  }
}


object GzipDecompressor {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private val compressedExtension: String = ".gz"

  def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): GzipDecompressor = {
    new GzipDecompressor(spark, dfs, configLocation)
  }

  private def changeFileExtension(fileName: String, currentExt: String, newExt: String): String = {
    val newFileName =  fileName.substring(0, fileName.lastIndexOf(currentExt))
    if (newFileName.endsWith(newExt)) newFileName else newFileName + newExt
  }
} 
Example 178
Source File: DataFrameUtils.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, functions}
import org.slf4j.{Logger, LoggerFactory}


object DataFrameUtils {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  type FilterFunction = Row => Boolean

  type PartitionCriteria = Seq[(String, String)]

  def mapPartitionsToDirectories(partitionCriteria: PartitionCriteria): Seq[String] = {
    partitionCriteria.map {
      case (columnName, columnValue) => s"$columnName=$columnValue"
    }
  }

  def buildPartitionsCriteriaMatcherFunc(multiplePartitionsCriteria: Seq[PartitionCriteria], schema: StructType): FilterFunction = {
    val targetPartitions = multiplePartitionsCriteria.flatten.map(_._1).toSet
    val fieldNameToMatchFunctionMapping = schema.fields.filter {
      case StructField(name, _, _, _) => targetPartitions.contains(name)
    }.map {
      case StructField(name, _: ByteType, _, _)    => name -> ((r: Row, value: String) => r.getAs[Byte](name)    == value.toByte)
      case StructField(name, _: ShortType, _, _)   => name -> ((r: Row, value: String) => r.getAs[Short](name)   == value.toShort)
      case StructField(name, _: IntegerType, _, _) => name -> ((r: Row, value: String) => r.getAs[Int](name)     == value.toInt)
      case StructField(name, _: LongType, _, _)    => name -> ((r: Row, value: String) => r.getAs[Long](name)    == value.toLong)
      case StructField(name, _: FloatType, _, _)   => name -> ((r: Row, value: String) => r.getAs[Float](name)   == value.toFloat)
      case StructField(name, _: DoubleType, _, _)  => name -> ((r: Row, value: String) => r.getAs[Double](name)  == value.toDouble)
      case StructField(name, _: BooleanType, _, _) => name -> ((r: Row, value: String) => r.getAs[Boolean](name) == value.toBoolean)
      case StructField(name, _: StringType, _, _)  => name -> ((r: Row, value: String) => r.getAs[String](name)  == value)
    }.toMap

    def convertPartitionCriteriaToFilterFunctions(partitionCriteria: PartitionCriteria): Seq[FilterFunction] = partitionCriteria.map {
      case (name, value) => (row: Row) => fieldNameToMatchFunctionMapping(name)(row, value)
    }

    def joinSinglePartitionFilterFunctionsWithAnd(partitionFilterFunctions: Seq[FilterFunction]): FilterFunction =
      partitionFilterFunctions
        .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) && predicate2(row))
        .getOrElse((_: Row) => false)

    multiplePartitionsCriteria
      .map(convertPartitionCriteriaToFilterFunctions)
      .map(joinSinglePartitionFilterFunctionsWithAnd)
      .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) || predicate2(row))
      .getOrElse((_: Row) => false)
  }


  implicit class DataFrameHelper(df: DataFrame) {

    def collectPartitions(targetPartitions: Seq[String]): Seq[PartitionCriteria] = {
      logger.info(s"Collecting unique partitions for partitions columns (${targetPartitions.mkString(", ")})")
      val partitions = df.selectExpr(targetPartitions: _*).distinct().collect()

      partitions.map { row =>
        targetPartitions.map { columnName =>
          Option(row.getAs[Any](columnName)) match {
            case Some(columnValue) => columnName -> columnValue.toString
            case None => throw new RuntimeException(s"Partition column '$columnName' contains null value")
          }
        }
      }
    }

    def addMissingColumns(targetSchema: StructType): DataFrame = {
      val dataFieldsSet = df.schema.fieldNames.toSet
      val selectColumns = targetSchema.fields.map { field =>
        if (dataFieldsSet.contains(field.name)) {
          functions.col(field.name)
        } else {
          functions.lit(null).cast(field.dataType).as(field.name)
        }
      }
      df.select(selectColumns: _*)
    }

    def isEmpty: Boolean = df.head(1).isEmpty

    def nonEmpty: Boolean = df.head(1).nonEmpty
  }
} 
Example 179
Source File: DataFormat.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import org.apache.spark.sql._
import org.apache.spark.sql.types.StructType
import org.slf4j.{Logger, LoggerFactory}


sealed trait DataFormat {

  protected val logger: Logger = LoggerFactory.getLogger(getClass)

  def read(reader: DataFrameReader, locations: String*): DataFrame

  def write(writer: DataFrameWriter[Row], location: String): Unit
}


object DataFormat {

  case class ParquetFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading Parquet data from $filesString")
      optionalSchema.fold(reader)(schema => reader.schema(schema)).parquet(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing Parquet data to $location")
      writer.parquet(location)
    }
  }

  case class DSVFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading DSV data from $filesString")
      optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).csv(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing DSV data to $location")
      writer.csv(location)
    }
  }

  case class JSONFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading JSON data from $filesString")
      optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).json(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing JSON data to $location")
      writer.json(location)
    }
  }
} 
Example 180
Source File: InputReader.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}


  def newTableLocationReader(table: String, format: DataFormat, options: Map[String, String] = Map.empty): TableLocationReader = {
    TableLocationReader(table, format, options)
  }

  case class TableReader(table: String, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      logger.info(s"Reading data from table $table")
      sparkSession.read.options(options).table(table)
    }
  }

  case class FileSystemReader(location: String, format: DataFormat, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      logger.info(s"Reading data from location $location")
      format.read(sparkSession.read.options(options), location)
    }
  }

  case class TableLocationReader(table: String, format: DataFormat, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      val location = HiveTableAttributeReader(table, sparkSession).getTableLocation
      logger.info(s"Reading data from location $location")
      format.read(sparkSession.read.options(options), location)
    }
  }
} 
Example 181
Source File: ConfigReader.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.analytics.util

import java.text.DecimalFormatSymbols

import org.slf4j.{Logger, LoggerFactory}

import scala.util.parsing.json.{JSON, JSONArray, JSONObject}


class ConfigReader(jsonContent: String) extends Serializable {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  private val decimalSeparator: Char = new DecimalFormatSymbols().getDecimalSeparator

  JSON.globalNumberParser = (in: String) => if (in.contains(decimalSeparator)) in.toDouble else in.toInt

  private lazy val config = JSON.parseRaw(jsonContent) match {
    case Some(JSONObject(obj)) => obj
    case _ => throw new IllegalArgumentException(s"Wrong format of the configuration file: $jsonContent")
  }

  def getAsSeq[T](propertyName: String): Seq[T] = {
    config.get(propertyName) match {
      case Some(JSONArray(list)) => list.map(_.asInstanceOf[T])
      case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAsMap[K, V](propertyName: String): Map[K,V] = {
    config.get(propertyName) match {
      case Some(JSONObject(obj)) => obj.asInstanceOf[Map[K,V]]
      case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAs[T](propertyName: String): T = {
    config.get(propertyName) match {
      case Some(property) => property.asInstanceOf[T]
      case None => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAsOption[T](propertyName: String): Option[T] = {
    config.get(propertyName).map(property => property.asInstanceOf[T])
  }

  def getAsOptionSeq[T](propertyName: String): Option[Seq[T]] = {
    config.get(propertyName).map(_ => getAsSeq(propertyName))
  }

  def contains(propertyName: String): Boolean = {
    config.contains(propertyName)
  }
}

object ConfigReader {
  def apply(jsonContent: String): ConfigReader = new ConfigReader(jsonContent)
} 
Example 182
Source File: BaseAlgorithmTest.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.utils

import java.util.UUID

import com.adidas.analytics.util.{DFSWrapper, LoadMode}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.types.StructType
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source

trait BaseAlgorithmTest extends Suite with BeforeAndAfterAll with BeforeAndAfterEach with HDFSSupport with SparkSupport {

  override val logger: Logger = LoggerFactory.getLogger(getClass)
  override val testAppId: String = UUID.randomUUID().toString
  override val localTestDir: String = "target"
  override val sparkHadoopConf: Option[Configuration] = Some(fs.getConf)

  val hdfsRootTestPath: Path = new Path("hdfs:///tmp/tests")
  val dfs: DFSWrapper = DFSWrapper(spark.sparkContext.hadoopConfiguration)

  override def afterAll(): Unit = {
    spark.stop()
    cluster.shutdown(true)
  }

  override def beforeEach(): Unit = {
    fs.delete(hdfsRootTestPath, true)
    fs.mkdirs(hdfsRootTestPath)
  }

  override def afterEach(): Unit = {
    spark.sqlContext.clearCache()
    spark.sparkContext.getPersistentRDDs.foreach {
      case (_, rdd) => rdd.unpersist(true)
    }
  }

  def resolveResource(fileName: String, withProtocol: Boolean = false): String = {
    val resource = s"${getClass.getSimpleName}/$fileName"
    logger.info(s"Resolving resource $resource")
    val location = getClass.getClassLoader.getResource(resource).getPath
    if (withProtocol) {
      s"file://$location"
    } else {
      location
    }
  }

  def getResourceAsText(fileName: String): String = {
    val resource = s"${getClass.getSimpleName}/$fileName"
    logger.info(s"Reading resource $resource")
    val stream = getClass.getClassLoader.getResourceAsStream(resource)
    Source.fromInputStream(stream).mkString
  }

  def copyResourceFileToHdfs(resource: String, targetPath: Path): Unit = {
    val localResourceRoot = resolveResource("", withProtocol = true)
    val sourcePath = new Path(localResourceRoot, resource)
    logger.info(s"Copying local resource to HDFS $sourcePath -> $targetPath")
    fs.copyFromLocalFile(sourcePath, targetPath)
  }

  
  def createAndLoadParquetTable(database: String, tableName: String, partitionColumns: Option[Seq[String]] = None, schema: StructType, filePath: String, reader: FileReader): Table = {
    val table = createParquetTable(database, tableName, partitionColumns, schema)
    val inputTableDataURI = resolveResource(filePath, withProtocol = true)
    table.write(Seq(inputTableDataURI), reader, LoadMode.OverwritePartitions)
    table
  }
} 
Example 183
Source File: HDFSSupport.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.utils

import java.io.File

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{CommonConfigurationKeysPublic, FileSystem}
import org.apache.hadoop.hdfs.MiniDFSCluster
import org.slf4j.Logger

trait HDFSSupport {

  private lazy val defaultDataNodesNum: Int = 2
  private lazy val defaultPort: Int = 8201

  lazy val cluster: MiniDFSCluster = startHDFS(clusterHdfsConf)
  lazy val fs: FileSystem = cluster.getFileSystem()

  def logger: Logger
  def testAppId: String
  def localTestDir: String
  def clusterHdfsConf: Option[Configuration] = Option.empty

  def startHDFS(hadoopConf: Option[Configuration]): MiniDFSCluster = {
    val appDir = new File(localTestDir, testAppId)
    val hdfsTestDir = new File(appDir, "hdfs").getAbsoluteFile
    hdfsTestDir.mkdirs()

    val clusterConf = hadoopConf.fold(new Configuration())(c => new Configuration(c))
    clusterConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsTestDir.getAbsolutePath)
    clusterConf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, s"hdfs://localhost:$defaultPort/")

    logger.info(s"Starting test DFS cluster with base directory at ${hdfsTestDir.getAbsolutePath} ...")
    new MiniDFSCluster.Builder(clusterConf)
      .numDataNodes(defaultDataNodesNum)
      .nameNodePort(defaultPort)
      .format(true)
      .build()
  }
} 
Example 184
Source File: SparkSupport.scala    From m3d-engine   with Apache License 2.0 5 votes vote down vote up
package com.adidas.utils

import java.io.File

import org.apache.hadoop.conf.Configuration
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.slf4j.Logger

import scala.collection.JavaConversions._

trait SparkSupport extends SparkSessionWrapper {

  def logger: Logger

  def testAppId: String

  def localTestDir: String

  override def startSpark(hadoopConf: Option[Configuration] = None): SparkSession = {
    // This line makes it possible to use a remote debugger
    System.setSecurityManager(null)

    val appDir = new File(localTestDir, testAppId)
    val sparkTestDir = new File(appDir, "spark").getAbsoluteFile
    sparkTestDir.mkdirs()

    val sparkConf = hadoopConf.foldLeft {
      new SparkConf(false)
        .set("spark.ui.enabled", "false")
        .set("spark.sql.warehouse.dir", new File(sparkTestDir, "warehouse").getAbsolutePath)
    } { (sparkConf, hadoopConf) =>
      hadoopConf.foldLeft(sparkConf)((sc, entry) => sc.set(s"spark.hadoop.${entry.getKey}", entry.getValue))
    }

    System.setProperty("derby.system.home", new File(sparkTestDir, "metastore").getAbsolutePath)

    logger.info(s"Staring Spark Session with warehouse dir at ${sparkTestDir.getAbsolutePath} ...")
    SparkSession.builder()
      .config(sparkConf)
      .appName(s"test-${getClass.getName}")
      .master("local[*]")
      .enableHiveSupport()
      .getOrCreate()
  }

  def addHadoopConfiguration(conf: Configuration): Unit = {
    conf.foreach { property =>
      spark.sparkContext.hadoopConfiguration.set(property.getKey, property.getValue)
    }
  }

  def addHadoopProperty(key: String, value: String): Unit = {
    spark.sparkContext.hadoopConfiguration.set(key, value)
  }

} 
Example 185
Source File: tracerlog.scala    From http4s-tracer   with Apache License 2.0 5 votes vote down vote up
package dev.profunktor.tracer.instances

import cats.effect.Sync
import cats.syntax.flatMap._
import dev.profunktor.tracer.Trace
import dev.profunktor.tracer.Trace._
import dev.profunktor.tracer.TracerLog
import org.slf4j.{Logger, LoggerFactory}

import scala.reflect.ClassTag

object tracerlog {

  implicit def defaultLog[F[_]](implicit F: Sync[F]): TracerLog[Trace[F, ?]] =
    new TracerLog[Trace[F, ?]] {
      def logger[A](implicit ct: ClassTag[A]): F[Logger] =
        F.delay(LoggerFactory.getLogger(ct.runtimeClass))

      override def info[A: ClassTag](value: => String): Trace[F, Unit] = Trace { id =>
        logger[A].flatMap { log =>
          if (log.isInfoEnabled) F.delay(log.info(s"$id - $value"))
          else F.unit
        }
      }

      override def error[A: ClassTag](value: => String): Trace[F, Unit] = Trace { id =>
        logger[A].flatMap { log =>
          if (log.isErrorEnabled) F.delay(log.error(s"$id - $value"))
          else F.unit
        }
      }

      override def warn[A: ClassTag](value: => String): Trace[F, Unit] = Trace { id =>
        logger[A].flatMap { log =>
          if (log.isWarnEnabled) F.delay(log.warn(s"$id - $value"))
          else F.unit
        }
      }
    }

} 
Example 186
Source File: ImageNormalization.scala    From flink-tensorflow   with Apache License 2.0 5 votes vote down vote up
package org.apache.flink.contrib.tensorflow.examples.inception

import org.apache.flink.contrib.tensorflow.examples.common.GraphBuilder
import org.apache.flink.contrib.tensorflow.examples.inception.ImageNormalization._
import org.apache.flink.contrib.tensorflow.examples.inception.ImageNormalizationMethod._
import org.apache.flink.contrib.tensorflow.graphs.{GraphDefGraphLoader, GraphLoader, GraphMethod}
import org.apache.flink.contrib.tensorflow.models.generic.GenericModel
import org.apache.flink.contrib.tensorflow.models.ModelFunction
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow._
import org.tensorflow.contrib.scala._
import org.tensorflow.framework.{SignatureDef, TensorInfo}

sealed trait ImageNormalizationMethod extends GraphMethod {
  val name = NORMALIZE_METHOD_NAME
  override type Input = ImageFileTensor
  override type Output = ImageTensor
}

object ImageNormalizationMethod {
  val NORMALIZE_METHOD_NAME = "inception/normalize"
  val NORMALIZE_INPUTS = "inputs"
  val NORMALIZE_OUTPUTS = "outputs"

  
  def normalize = ModelFunction[ImageNormalizationMethod](session, signatureDef)
}

object ImageNormalization {

  private[inception] val LOG: Logger = LoggerFactory.getLogger(classOf[ImageNormalization])

} 
Example 187
Source File: ImageInputFormat.scala    From flink-tensorflow   with Apache License 2.0 5 votes vote down vote up
package org.apache.flink.contrib.tensorflow.examples.inception

import java.io.IOException
import java.util.Collections

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.io.GlobFilePathFilter
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow._
import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._
import org.apache.flink.core.fs.{FSDataInputStream, Path}
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow.contrib.scala.ByteStrings._
import resource._

import scala.collection.JavaConverters._


  override def readRecord(
       reuse: (String,ImageTensorValue),
       filePath: Path, fileStream: FSDataInputStream,
       fileLength: Long): (String,ImageTensorValue) = {

    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val imageData =
      readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile]

    val imageTensor: ImageTensorValue =
      managed(imageData.as[ImageFileTensor])
      .flatMap(x => model.normalize(x))
      .acquireAndGet(_.toValue)

    (filePath.getName, imageTensor)
  }
}

object ImageInputFormat {
  def apply(): ImageInputFormat = new ImageInputFormat
} 
Example 188
Source File: InceptionModel.scala    From flink-tensorflow   with Apache License 2.0 5 votes vote down vote up
package org.apache.flink.contrib.tensorflow.examples.inception

import java.net.URI
import java.nio.charset.StandardCharsets

import org.apache.flink.contrib.tensorflow.examples.inception.LabelMethod._
import org.apache.flink.contrib.tensorflow.graphs.{DefaultGraphLoader, GraphLoader, GraphMethod}
import org.apache.flink.contrib.tensorflow.models.generic.GenericModel
import org.apache.flink.contrib.tensorflow.models.ModelFunction
import org.apache.flink.contrib.tensorflow.util.GraphUtils
import org.apache.flink.core.fs.Path
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow.Tensor
import org.tensorflow.contrib.scala._
import org.tensorflow.framework.{SignatureDef, TensorInfo}

import scala.collection.JavaConverters._

sealed trait LabelMethod extends GraphMethod {
  def name = LABEL_METHOD_NAME
  override type Input = ImageTensor
  override type Output = LabelTensor
}

@SerialVersionUID(1L)
object LabelMethod {
  val LABEL_METHOD_NAME = "inception/label"
  val LABEL_INPUTS = "inputs"
  val LABEL_OUTPUTS = "outputs"

  
    def toTextLabels(take: Int = 3)(implicit model: InceptionModel): Array[LabeledImage] = {
      // the tensor consists of a row per image, with columns representing label probabilities
      require(t.numDimensions() == 2, "expected a [M N] shaped tensor")
      val matrix = Array.ofDim[Float](t.shape()(0).toInt,t.shape()(1).toInt)
      t.copyTo(matrix)
      matrix.map { row =>
        LabeledImage(row.toList.zip(model.labels).sortWith(_._1 > _._1).take(take))
      }
    }
  }

} 
Example 189
Source File: ProgressBar.scala    From scaladex   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package ch.epfl.scala.index
package data

import me.tongfei.progressbar.{ProgressBar => PB, ProgressBarStyle}
import java.io.{PrintStream, ByteArrayOutputStream, OutputStream}
import org.slf4j.Logger

object ProgressBar {
  def apply(title: String, count: Int, logger: Logger): ProgressBar = {
    new ProgressBar(
      new PB(title, count, 1000, System.out, ProgressBarStyle.UNICODE_BLOCK),
      logger,
      count
    )
  }
}

class ProgressBar(inner: PB, logger: Logger, count: Int) {
  var c = 0
  var printed = 0

  def start(): Unit = {
    inner.start()
  }

  def step(): Unit = {
    inner.step()
    c += 1
    print()
  }

  def stepBy(n: Int): Unit = {
    inner.stepBy(n)
    c += n
    print()
  }

  def stop(): Unit = {
    inner.stop()
  }

  private def print(): Unit = {
    val pp = ((c.toDouble / count) * 100).toInt

    if (printed < pp) {
      logger.debug(pp + "%")
      printed = pp
    }
  }
} 
Example 190
Source File: PerceptronUtils.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.annotators.pos.perceptron

import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.{Map => MMap}

trait PerceptronUtils  {

  private[perceptron] val START = Array("-START-", "-START2-")
  private[perceptron] val END = Array("-END-", "-END2-")

  private[perceptron] val logger: Logger = LoggerFactory.getLogger("PerceptronTraining")

  
  private[perceptron] def getFeatures(
                                       init: Int,
                                       word: String,
                                       context: Array[String],
                                       prev: String,
                                       prev2: String
                                     ): Map[String, Int] = {
    val features = MMap[String, Int]().withDefaultValue(0)
    def add(name: String, args: Array[String] = Array()): Unit = {
      features((name +: args).mkString(" ")) += 1
    }
    val i = init + START.length
    add("bias")
    add("i suffix", Array(word.takeRight(3)))
    add("i pref1", Array(word.head.toString))
    add("i-1 tag", Array(prev))
    add("i-2 tag", Array(prev2))
    add("i tag+i-2 tag", Array(prev, prev2))
    add("i word", Array(context(i)))
    add("i-1 tag+i word", Array(prev, context(i)))
    add("i-1 word", Array(context(i-1)))
    add("i-1 suffix", Array(context(i-1).takeRight(3)))
    add("i-2 word", Array(context(i-2)))
    add("i+1 word", Array(context(i+1)))
    add("i+1 suffix", Array(context(i+1).takeRight(3)))
    add("i+2 word", Array(context(i+2)))
    features.toMap
  }
} 
Example 191
Source File: BookParser.scala    From get-programming-with-scala   with MIT License 5 votes vote down vote up
package org.example.books

import org.example.books.entities.Book
import com.github.tototoshi.csv._
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source
import scala.util.{Failure, Success, Try}

class BookParser(filePath: String) {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val books: List[Book] = {
    loadCSVFile(filePath).flatMap { rowData =>
      Book.parse(rowData) match {
        case Success(book) => Some(book)
        case Failure(ex) =>
          logger.warn(s"Skipping book: Unable to parse row because of ${ex.getMessage} - row was $rowData")
          None
      }
    }
  }

  private def loadCSVFile(path: String): List[Map[String, String]] = {
    logger.info(s"Processing file $path...")
    val file = Source.fromResource(path)
    val reader = CSVReader.open(file)
    val data = reader.allWithHeaders()
    logger.info(s"Completed processing of file $path! ${data.size} records loaded")
    data
  }
} 
Example 192
Source File: BookService.scala    From get-programming-with-scala   with MIT License 5 votes vote down vote up
package org.example.books

import org.example.books.entities._
import org.slf4j.{Logger, LoggerFactory}

class BookService(bookCatalogPath: String) {
  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private val books: List[Book] = new BookParser(bookCatalogPath).books

  private var bookLoans: Set[BookLoan] = Set.empty

  def search(title: Option[String] = None,
             author: Option[String] = None): List[Book] =
    books.filter { book =>
      title.forall(t => containsCaseInsensitive(book.title, t)) &&
      author.forall(a => book.authors.exists(containsCaseInsensitive(_, a)))
    }

  private def containsCaseInsensitive(text: String,
                                      substring: String): Boolean =
    text.toLowerCase.contains(substring.toLowerCase)

  def reserveBook(bookId: Long, user: User): Either[String, BookLoan] = {
    val res = for {
      _ <- checkReserveLimits(user)
      book <- checkBookExists(bookId)
      _ <- checkBookIsAvailable(book)
    } yield registerBookLoan(book, user)
    logger.info(s"Book $bookId - User ${user.id} - Reserve request: ${outcomeMsg(res)}")
    res
  }

  def returnBook(bookId: Long): Either[String, BookLoan] = {
    val res = for {
      book <- checkBookExists(bookId)
      user <- checkBookIsTaken(book)
    } yield unregisterBookLoan(book, user)
    logger.info(s"Book $bookId - Return request: ${outcomeMsg(res)}")
    res
  }

  private def outcomeMsg[T](res: Either[String, T]): String =
    res.left.getOrElse("OK")

  private val loanLimit = 5
  private def checkReserveLimits(user: User): Either[String, User] =
    if (bookLoans.count(_.user == user) < loanLimit) Right(user)
    else Left(
      s"You cannot loan more than $loanLimit books at the time")

  private def checkBookExists(bookId: Long): Either[String, Book] =
    books.find(_.id == bookId) match {
      case Some(book) => Right(book)
      case None => Left(s"Book with id $bookId not found")
    }

  private def checkBookIsAvailable(book: Book): Either[String, Book] =
    findBookLoan(book) match {
      case Some(_) => Left(s"Another user has book ${book.id}")
      case None => Right(book)
    }

  private def checkBookIsTaken(book: Book): Either[String, User] =
    findBookLoan(book) match {
      case Some(BookLoan(_, user)) => Right(user)
      case None => Left(s"Book ${book.id} does not result out on loan")
    }

  private def findBookLoan(book: Book): Option[BookLoan] =
    bookLoans.find(_.book == book)

  private def registerBookLoan(book: Book, user: User): BookLoan = {
    val bookLoan = BookLoan(book, user)
    updateBookLoans(loans => loans + bookLoan)
    bookLoan
  }

  private def unregisterBookLoan(book: Book, user: User): BookLoan = {
    val bookLoan = BookLoan(book, user)
    updateBookLoans(loans => loans - bookLoan)
    bookLoan
  }

  private def updateBookLoans(f: Set[BookLoan] => Set[BookLoan]): Unit =
    synchronized { bookLoans = f(bookLoans) }

} 
Example 193
Source File: MoviesDataset.scala    From get-programming-with-scala   with MIT License 5 votes vote down vote up
package org.example.movies

import com.github.tototoshi.csv._
import org.slf4j.{Logger, LoggerFactory}
import org.example.movies.entities.Movie

import scala.io.Source

class MoviesDataset(moviesMetadataFile: String) {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val movies: List[Movie] = {
    val rawData = loadCSVFile(moviesMetadataFile)
    rawData.flatMap { raw =>
      // skipping malformed rows that are malformed
      Movie.parse(raw)
    }
  }

  private def loadCSVFile(path: String): List[Map[String, String]] = {
    logger.info(s"Processing file $path...")
    val file = Source.fromResource(path)
    val reader = CSVReader.open(file)
    val data = reader.allWithHeaders()
    logger.info(s"Completed processing of file $path! ${data.size} records loaded")
    data
  }

} 
Example 194
Source File: WSClient.scala    From play-auditing   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.audit

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.libs.ws.WSClientConfig
import play.api.libs.ws.ahc.{AhcConfigBuilder, AhcWSClientConfig, AhcWSClient}

import scala.concurrent.Future
import scala.concurrent.duration.Duration


package object handler {
  type WSClient = play.api.libs.ws.WSClient

  object WSClient {
    private val logger: Logger = LoggerFactory.getLogger(getClass)

    def apply(
      connectTimeout: Duration,
      requestTimeout: Duration,
      userAgent     : String
    )(implicit
      materializer: Materializer
    ): WSClient =
      new AhcWSClient(
        new AhcConfigBuilder(
          ahcConfig = AhcWSClientConfig()
                        .copy(wsClientConfig = WSClientConfig()
                          .copy(
                            connectionTimeout = connectTimeout,
                            requestTimeout    = requestTimeout,
                            userAgent         = Some(userAgent)

                          )
                        )
        ).build()
      )
  }
} 
Example 195
Source File: DatastreamHandler.scala    From play-auditing   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.audit.handler

import java.net.URL

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.inject.ApplicationLifecycle
import play.api.libs.json.JsValue
import uk.gov.hmrc.audit.HandlerResult
import uk.gov.hmrc.audit.HandlerResult.{Failure, Rejected, Success}

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration

class DatastreamHandler(
  scheme        : String,
  host          : String,
  port          : Integer,
  path          : String,
  connectTimeout: Duration,
  requestTimeout: Duration,
  userAgent     : String,
  materializer  : Materializer,
  lifecycle     : ApplicationLifecycle
) extends HttpHandler(
  endpointUrl    = new URL(s"$scheme://$host:$port$path"),
  userAgent      = userAgent,
  connectTimeout = connectTimeout,
  requestTimeout = requestTimeout,
  materializer   = materializer,
  lifecycle      = lifecycle
) with AuditHandler {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  override def sendEvent(event: JsValue)(implicit ec: ExecutionContext): Future[HandlerResult] =
    sendEvent(event, retryIfMalformed = true)

  private def sendEvent(event: JsValue, retryIfMalformed: Boolean)(implicit ec: ExecutionContext): Future[HandlerResult] =
    sendHttpRequest(event).flatMap {
      case HttpResult.Response(status) =>
        Future.successful(status match {
          case 204 => Success
          case 400 => logger.warn("Malformed request rejected by Datastream")
                      Rejected
          case 413 => logger.warn("Too large request rejected by Datastream")
                      Rejected
          case _   => logger.error(s"Unknown return value $status")
                      Failure
        })
      case HttpResult.Malformed =>
        if (retryIfMalformed) {
          logger.warn("Malformed response on first request, retrying")
          sendEvent(event, retryIfMalformed = false)
        } else {
          logger.warn("Malformed response on second request, failing")
          Future.successful(Failure)
        }
      case HttpResult.Failure(msg, exceptionOption) =>
        exceptionOption match {
          case None     => logger.error(msg)
          case Some(ex) => logger.error(msg, ex)
        }
        Future.successful(Failure)
    }
} 
Example 196
Source File: HttpHandler.scala    From play-auditing   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.audit.handler

import java.io.IOException
import java.net.URL
import java.util.concurrent.TimeoutException

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.inject.ApplicationLifecycle
import play.api.libs.json.JsValue

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration


sealed trait HttpResult
object HttpResult {
  case class Response(statusCode: Int) extends HttpResult
  case object Malformed extends HttpResult
  case class Failure(msg: String, nested: Option[Throwable] = None) extends Exception(msg, nested.orNull) with HttpResult
}

abstract class HttpHandler(
  endpointUrl      : URL,
  userAgent        : String,
  connectTimeout   : Duration,
  requestTimeout   : Duration,
  materializer     : Materializer,
  lifecycle        : ApplicationLifecycle
) {
  private val logger: Logger = LoggerFactory.getLogger(getClass)

  val HTTP_STATUS_CONTINUE = 100

  val wsClient: WSClient = {
    implicit val m = materializer
    val wsClient = WSClient(connectTimeout, requestTimeout, userAgent)
    lifecycle.addStopHook { () =>
      logger.info("Closing play-auditing http connections...")
      wsClient.close()
      Future.successful(())
    }
    wsClient
  }

  def sendHttpRequest(event: JsValue)(implicit ec: ExecutionContext): Future[HttpResult] =
    try {
      logger.debug(s"Sending audit request to URL ${endpointUrl.toString}")

      wsClient.url(endpointUrl.toString)
        .post(event)
        .map { response =>
          val httpStatusCode = response.status
          logger.debug(s"Got status code : $httpStatusCode")
          response.body
          logger.debug("Response processed and closed")

          if (httpStatusCode >= HTTP_STATUS_CONTINUE) {
            logger.info(s"Got status code $httpStatusCode from HTTP server.")
            HttpResult.Response(httpStatusCode)
          } else {
            logger.warn(s"Malformed response (status $httpStatusCode) returned from server")
            HttpResult.Malformed
          }
        }.recover {
          case e: TimeoutException =>
            HttpResult.Failure("Error opening connection, or request timed out", Some(e))
          case e: IOException =>
            HttpResult.Failure("Error opening connection, or request timed out", Some(e))
        }
    } catch {
      case t: Throwable =>
        Future.successful(HttpResult.Failure("Error sending HTTP request", Some(t)))
    }
} 
Example 197
Source File: LoggingHandlerSpec.scala    From play-auditing   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.audit.handler

import org.mockito.Mockito._
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatestplus.mockito.MockitoSugar
import org.slf4j.Logger
import play.api.libs.json.JsString

import scala.concurrent.ExecutionContext.Implicits.global

class LoggingHandlerSpec extends AnyWordSpecLike with MockitoSugar {

  val mockLog: Logger = mock[Logger]
  val loggingHandler = new LoggingHandler(mockLog)

  "LoggingHandler" should {
    "log the event" in {
      val expectedLogContent = """DS_EventMissed_AuditRequestFailure : audit item : "FAILED_EVENT""""

      loggingHandler.sendEvent(JsString("FAILED_EVENT"))

      verify(mockLog).warn(expectedLogContent)
    }
  }
} 
Example 198
Source File: PrometheusUtils.scala    From kafka-lag-exporter   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.kafkalagexporter.integration

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpRequest, HttpResponse, StatusCodes}
import akka.http.scaladsl.unmarshalling.Unmarshal
import akka.stream.Materializer
import com.lightbend.kafkalagexporter.MetricsSink.GaugeDefinition
import org.scalatest.Matchers
import org.scalatest.concurrent.ScalaFutures
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.{ExecutionContext, Future}
import scala.util.matching.Regex


      val regex = s"""$name\\{$labels.*\\}\\s+(-?.+)""".r
      log.debug(s"Created regex: {}", regex.pattern.toString)
      Rule(regex, assertion)
    }
  }

  case class Rule(regex: Regex, assertion: String => _)

  case class Result(rule: Rule, groupResults: List[String]) {
    def assertDne(): Unit = {
      log.debug(s"Rule: ${rule.regex.toString}")
      groupResults.length shouldBe 0
    }

    def assert(): Unit = {
      log.debug(s"Rule: ${rule.regex.toString}")
      groupResults.length shouldBe 1
      log.debug(s"Actual value is ${groupResults.head}")
      rule.assertion(groupResults.head)
    }
  }
} 
Example 199
Source File: SignalLogger.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import org.apache.commons.lang3.SystemUtils
import org.slf4j.Logger
import sun.misc.{Signal, SignalHandler}


  def register(log: Logger): Unit = synchronized {
    if (SystemUtils.IS_OS_UNIX) {
      require(!registered, "Can't re-install the signal handlers")
      registered = true

      val signals = Seq("TERM", "HUP", "INT")
      for (signal <- signals) {
        try {
          new SignalLoggerHandler(signal, log)
        } catch {
          case e: Exception => log.warn("Failed to register signal handler " + signal, e)
        }
      }
      log.info("Registered signal handlers for [" + signals.mkString(", ") + "]")
    }
  }
}

private sealed class SignalLoggerHandler(name: String, log: Logger) extends SignalHandler {

  val prevHandler = Signal.handle(new Signal(name), this)

  override def handle(signal: Signal): Unit = {
    log.error("RECEIVED SIGNAL " + signal.getNumber() + ": SIG" + signal.getName())
    prevHandler.handle(signal)
  }
} 
Example 200
Source File: ActorLogReceive.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import akka.actor.Actor
import org.slf4j.Logger


private[spark] trait ActorLogReceive {
  self: Actor =>

  override def receive: Actor.Receive = new Actor.Receive {

    private val _receiveWithLogging = receiveWithLogging

    override def isDefinedAt(o: Any): Boolean = {
      val handled = _receiveWithLogging.isDefinedAt(o)
      if (!handled) {
        log.debug(s"Received unexpected actor system event: $o")
      }
      handled
    }

    override def apply(o: Any): Unit = {
      if (log.isDebugEnabled) {
        log.debug(s"[actor] received message $o from ${self.sender}")
      }
      val start = System.nanoTime
      _receiveWithLogging.apply(o)
      val timeTaken = (System.nanoTime - start).toDouble / 1000000
      if (log.isDebugEnabled) {
        log.debug(s"[actor] handled message ($timeTaken ms) $o from ${self.sender}")
      }
    }
  }

  def receiveWithLogging: Actor.Receive

  protected def log: Logger
}