org.apache.spark.scheduler.SparkListenerJobStart Scala Examples

The following examples show how to use org.apache.spark.scheduler.SparkListenerJobStart. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: CleanupUtil.scala    From hazelcast-spark   with Apache License 2.0 7 votes vote down vote up
package com.hazelcast.spark.connector.util

import com.hazelcast.spark.connector.util.ConnectionUtil.closeAll
import org.apache.spark.SparkContext
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart}

object CleanupUtil {

  val jobIds: collection.mutable.Map[Int, Seq[Int]] = collection.mutable.Map[Int, Seq[Int]]()
  val cleanupJobRddName: String = "HazelcastResourceCleanupJob"

  def addCleanupListener(sc: SparkContext): Unit = {
    sc.addSparkListener(new SparkListener {
      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
        this.synchronized {
          jobStart.stageInfos.foreach(info => {
            info.rddInfos.foreach(rdd => {
              if (!cleanupJobRddName.equals(rdd.name)) {
                val ids: Seq[Int] = info.rddInfos.map(_.id)
                val maybeIds: Option[Seq[Int]] = jobIds.get(jobStart.jobId)
                if (maybeIds.isDefined) {
                  jobIds.put(jobStart.jobId, ids ++ maybeIds.get)
                } else {
                  jobIds.put(jobStart.jobId, ids)
                }
              }
            })
          })
        }
      }

      override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
        this.synchronized {
          if (jobIds.contains(jobEnd.jobId)) {
            try {
              val workers = sc.getConf.getInt("spark.executor.instances", sc.getExecutorStorageStatus.length)
              val rddId: Option[Seq[Int]] = jobIds.get(jobEnd.jobId)
              if (rddId.isDefined) {
                sc.parallelize(1 to workers, workers).setName(cleanupJobRddName).foreachPartition(it ⇒ closeAll(rddId.get))
              }
              jobIds -= jobEnd.jobId
            } catch {
              case e: Exception =>
            }
          }
        }
      }
    })
  }


} 
Example 2
Source File: ApplicationIdleMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.job

import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit}
import java.util.concurrent.atomic.AtomicReference

import scala.collection.JavaConverters._

import org.apache.spark.JobExecutionStatus
import org.apache.spark.alarm.{AlertMessage, HtmlMessage}
import org.apache.spark.monitor.{Monitor, MonitorItem}
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.scheduler.{SparkListenerEvent, SparkListenerJobEnd, SparkListenerJobStart}
import org.apache.spark.status.JobDataWrapper

class ApplicationIdleMonitor extends JobMonitor {

  override val item: MonitorItem = MonitorItem.APP_IDLE_WARNER
  val delayThread = Executors.newScheduledThreadPool(1)
  lazy val endureLimit =
    conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.timeout", "1h")
  private var idleTimeout: AtomicReference[ScheduledFuture[_]] = new AtomicReference()

  private def getActiveJobNum(): Int = {
//    appStore.count(classOf[JobDataWrapper], "completionTime", -1L)
    kvStore
      .view(classOf[JobDataWrapper])
      .reverse()
      .asScala
      .map(_.info)
      .filter(_.status == JobExecutionStatus.RUNNING)
      .size
  }

  private def stopIdleTimeout(): Unit = {
    val idleTimeout = this.idleTimeout.getAndSet(null)
    if (idleTimeout != null) {
      idleTimeout.cancel(false)
    }
  }

  private def setupIdleTimeout(): Unit = {
    if (getActiveJobNum > 0) return
    val timeoutTask = new Runnable() {
      override def run(): Unit = {
        // scalastyle:off
        val driverlUrl = conf
          .get(
            "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES")
          .split(",")
          .head
        val a = <h2>您的Spark应用</h2>
            <a href={driverlUrl}>{driverlUrl}</a>
            <h2>空闲已超过 {conf.get(
              s"${Monitor.PREFIX}.${item}.timeout", "1h")}</h2>
            <h2>请及时关闭</h2>
        val message = new HtmlMessage(title = item, content = a.mkString)
        alarms.foreach(_.alarm(message))
        // scalastyle:on
      }
    }

    val timeout = delayThread
      .scheduleWithFixedDelay(timeoutTask, endureLimit, endureLimit, TimeUnit.MILLISECONDS)
    // If there's already an idle task registered, then cancel the new one.
    if (!this.idleTimeout.compareAndSet(null, timeout)) {
      timeout.cancel(false)
    }
    // If a new client connected while the idle task was being set up, then stop the task.
    if (getActiveJobNum > 0) stopIdleTimeout()
  }

  override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = {
    event match {
      case env: SparkListenerJobStart =>
        stopIdleTimeout
        Option.empty
      case env: SparkListenerJobEnd =>
        setupIdleTimeout
        Option.empty
      case _ =>
        Option.empty
    }
  }
} 
Example 3
Source File: QueryGuardListener.scala    From gimel   with Apache License 2.0 5 votes vote down vote up
package com.paypal.gimel.common.query.guard

import java.util.concurrent.atomic.AtomicBoolean

import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted}
import org.apache.spark.sql.SparkSession
import org.joda.time.{DateTime, Instant}

import com.paypal.gimel.common.conf.{QueryGuardConfigs, QueryGuardConstants}
import com.paypal.gimel.common.utilities.GenericUtils
import com.paypal.gimel.logger.Logger

class QueryGuardListener[E >: QueryGuardDelayedEvent](spark: SparkSession,
                                                      discoveryType: String =
                                                        "job")
    extends SparkListener
    with Producer[E] {
  private val logger = new Logger(this.getClass.getName)
  private val stopped = new AtomicBoolean(true)
  private val HEADER: String = "[DISCOVERY] "
  private var _consumers: Seq[Consumer[E]] = Seq.empty

  override def onJobStart(jobStart: SparkListenerJobStart) {
    logger.info(
      s"${HEADER}Job[${jobStart.jobId}] started with ${jobStart.stageInfos.size} stages @ ${Instant.now()}"
    )
    if (!stopped.get) {
      val job = JobSubmitted(
        jobStart.jobId,
        discoveryType,
        System.currentTimeMillis(),
        jobTtl = GenericUtils.getValue(
          spark.conf,
          QueryGuardConfigs.JOB_TTL,
          QueryGuardConstants.DEFAULT_JOB_TTL
        ),
        delayTtl = GenericUtils.getValue(
          spark.conf,
          QueryGuardConfigs.DELAY_TTL,
          QueryGuardConstants.DEFAULT_DELAY_TTL
        )
      )
      logger.info(
        s"${HEADER}Proceeding to queue in Job[${jobStart.jobId}] onto QueryGuard"
      )
      publish(job)
    } else {
      logger.info(
        s"${HEADER}As QueryGuardListener is ${stopped.get()}," +
          s" unable to queue in Job[${jobStart.jobId}]"
      )
    }
  }

  override def publish(queryGuardEvent: E): Unit = {
    for (consumer <- _consumers) {
      consumer.consume(queryGuardEvent)
    }
  }

  override def onStageCompleted(
    stageCompleted: SparkListenerStageCompleted
  ): Unit = {
    logger.info(
      s"Stage ${stageCompleted.stageInfo.stageId} completed with ${stageCompleted.stageInfo.numTasks} tasks."
    )
  }

  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
    logger.info(
      s"Job[${jobEnd.jobId}] completed at ${new DateTime(jobEnd.time)}" +
        s" with result -> ${jobEnd.jobResult}"
    )
    super.onJobEnd(jobEnd)
  }

  override def registerConsumers(consumers: Seq[Consumer[E]]): Unit = {
    _consumers = consumers
  }

  def start(): Unit = {
    // toggle stopped to true
    stopped.set(false)
    logger.info(s"${HEADER}Started QueryGuardListener: $stopped")
  }

  def stop(): Unit = {
    // toggle stopped to true
    stopped.compareAndSet(false, true)
    logger.info(s"${HEADER}Stopped QueryGuardListener: $stopped")
  }
}