org.apache.spark.util.JsonProtocol Scala Examples

The following examples show how to use org.apache.spark.util.JsonProtocol. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SourceStatus.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.streaming

import java.{util => ju}

import scala.collection.JavaConverters._

import org.json4s._
import org.json4s.JsonAST.JValue
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._

import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.streaming.StreamingQueryStatus.indent
import org.apache.spark.util.JsonProtocol


private[sql] object SourceStatus {
  def apply(
      desc: String,
      offsetDesc: String,
      inputRate: Double,
      processingRate: Double,
      triggerDetails: Map[String, String]): SourceStatus = {
    new SourceStatus(desc, offsetDesc, inputRate, processingRate, triggerDetails.asJava)
  }
} 
Example 2
Source File: ReplayListenerBus.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.ReplayListenerBus._
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false,
      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {

    var currentLine: String = null
    var lineNumber: Int = 0

    try {
      val lineEntries = Source.fromInputStream(logData)
        .getLines()
        .zipWithIndex
        .filter { case (line, _) => eventsFilter(line) }

      while (lineEntries.hasNext) {
        try {
          val entry = lineEntries.next()

          currentLine = entry._1
          lineNumber = entry._2 + 1

          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            // the last entry may not be the very last line in the event log, but we treat it
            // as such in a best effort to replay the given input
            if (!maybeTruncated || lineEntries.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}


private[spark] object ReplayListenerBus {

  type ReplayEventsFilter = (String) => Boolean

  // utility filter that selects all event logs during replay
  val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true }
} 
Example 3
Source File: SQLJsonProtocolSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution

import org.json4s.jackson.JsonMethods.parse

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart
import org.apache.spark.util.JsonProtocol

class SQLJsonProtocolSuite extends SparkFunSuite {

  test("SparkPlanGraph backward compatibility: metadata") {
    val SQLExecutionStartJsonString =
      """
        |{
        |  "Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart",
        |  "executionId":0,
        |  "description":"test desc",
        |  "details":"test detail",
        |  "physicalPlanDescription":"test plan",
        |  "sparkPlanInfo": {
        |    "nodeName":"TestNode",
        |    "simpleString":"test string",
        |    "children":[],
        |    "metadata":{},
        |    "metrics":[]
        |  },
        |  "time":0
        |}
      """.stripMargin
    val reconstructedEvent = JsonProtocol.sparkEventFromJson(parse(SQLExecutionStartJsonString))
    val expectedEvent = SparkListenerSQLExecutionStart(0, "test desc", "test detail", "test plan",
      new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0)
    assert(reconstructedEvent == expectedEvent)
  }
} 
Example 4
Source File: UnifiedSparkListener.scala    From spark-monitoring   with MIT License 5 votes vote down vote up
package org.apache.spark.listeners

import java.time.Instant

import org.apache.spark.{SparkConf, SparkException, SparkInformation}
import org.apache.spark.internal.Logging
import org.apache.spark.listeners.sink.SparkListenerSink
import org.apache.spark.scheduler._
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.JsonProtocol
import org.json4s.JsonAST.JValue
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, render}

import scala.util.control.NonFatal


class UnifiedSparkListener(override val conf: SparkConf)
  extends UnifiedSparkListenerHandler
    with Logging
    with SparkListenerHandlers
    with StreamingListenerHandlers
    with StreamingQueryListenerHandlers {

  private val listenerSink = this.createSink(this.conf)

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    // All events in Spark that are not specific to SparkListener go through
    // this method.  The typed ListenerBus implementations intercept and forward to
    // their "local" listeners.
    // We will just handle everything here so we only have to have one listener.
    // The advantage is that this can be registered in extraListeners, so no
    // code change is required to add listener support.
    event match {
      // We will use the ClassTag for the private wrapper class to match
      case this.streamingListenerEventClassTag(e) =>
        this.onStreamingListenerEvent(e)
      case streamingQueryListenerEvent: StreamingQueryListener.Event =>
        this.onStreamingQueryListenerEvent(streamingQueryListenerEvent)
      case sparkListenerEvent: SparkListenerEvent => if (sparkListenerEvent.logEvent) {
        logSparkListenerEvent(sparkListenerEvent)
      }
    }
  }

  private def createSink(conf: SparkConf): SparkListenerSink = {
    val sink = conf.getOption("spark.unifiedListener.sink") match {
      case Some(listenerSinkClassName) => listenerSinkClassName
      case None => throw new SparkException("spark.unifiedListener.sink setting is required")
    }
    logInfo(s"Creating listener sink: ${sink}")
    org.apache.spark.util.Utils.loadExtensions(
      classOf[SparkListenerSink],
      Seq(sink),
      conf).head
  }

  protected def logSparkListenerEvent(
                                       event: SparkListenerEvent,
                                       getTimestamp: () => Instant =
                                       () => Instant.now()): Unit = {
    val json = try {
      // Add a well-known time field.
      Some(
        JsonProtocol.sparkEventToJson(event)
          .merge(render(
            SparkInformation.get() + ("SparkEventTime" -> getTimestamp().toString)
          ))
      )
    } catch {
      case NonFatal(e) =>
        logError(s"Error serializing SparkListenerEvent to JSON: $event", e)
        None
    }

    sendToSink(json)
  }

  private[spark] def sendToSink(json: Option[JValue]): Unit = {
    try {
      json match {
        case Some(j) => {
          logDebug(s"Sending event to listener sink: ${compact(j)}")
          this.listenerSink.logEvent(json)
        }
        case None => {
          logWarning("json value was None")
        }
      }
    } catch {
      case NonFatal(e) =>
        logError(s"Error sending to listener sink: $e")
    }
  }
} 
Example 5
Source File: ReplayListenerBus.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(logData: InputStream, sourceName: String): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      lines.foreach { line =>
        currentLine = line
        postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

} 
Example 6
Source File: MergeIntoAccumulatorSuite.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta

import java.util.concurrent.atomic.AtomicReference

import scala.collection.JavaConverters._

import org.apache.spark.sql.delta.commands.MergeIntoCommand
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest

import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.status.TaskDataWrapper
import org.apache.spark.util.JsonProtocol


class MergeIntoAccumulatorSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest {

  import testImplicits._

  private def runTestMergeCommand(): Unit = {
    // Run a simple merge command
    withTempView("source") {
      withTempDir { tempDir =>
        val tempPath = tempDir.getCanonicalPath
        Seq((1, 1), (0, 3)).toDF("key", "value").createOrReplaceTempView("source")
        Seq((2, 2), (1, 4)).toDF("key", "value").write.format("delta").save(tempPath)
        spark.sql(s"""
          |MERGE INTO delta.`$tempPath` target
          |USING source src
          |ON src.key = target.key
          |WHEN MATCHED THEN UPDATE SET *
          |WHEN NOT MATCHED THEN INSERT *
          |""".stripMargin)
      }
    }
  }

  test("accumulators used by MERGE should not be tracked by Spark UI") {
    runTestMergeCommand()

    // Make sure all Spark events generated by the above command have been processed
    spark.sparkContext.listenerBus.waitUntilEmpty(30000)

    val store = spark.sparkContext.statusStore.store
    val iter = store.view(classOf[TaskDataWrapper]).closeableIterator()
    try {
      // Collect all accumulator names tracked by Spark UI.
      val accumNames = iter.asScala.toVector.flatMap { task =>
        task.accumulatorUpdates.map(_.name)
      }.toSet
      // Verify accumulators used by MergeIntoCommand are not tracked.
      assert(!accumNames.contains(MergeIntoCommand.TOUCHED_FILES_ACCUM_NAME))
    } finally {
      iter.close()
    }
  }

} 
Example 7
Source File: ReplayListenerBus.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      while (lines.hasNext) {
        currentLine = lines.next()
        try {
          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            if (!maybeTruncated || lines.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

} 
Example 8
Source File: ReplayListenerBus.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      while (lines.hasNext) {
        currentLine = lines.next()
        try {
          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            //我们只能忽略可能被截断的文件的最后一行的异常
            if (!maybeTruncated || lines.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

} 
Example 9
Source File: SQLJsonProtocolSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution

import org.json4s.jackson.JsonMethods.parse

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart
import org.apache.spark.util.JsonProtocol

class SQLJsonProtocolSuite extends SparkFunSuite {

  test("SparkPlanGraph backward compatibility: metadata") {
    val SQLExecutionStartJsonString =
      """
        |{
        |  "Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart",
        |  "executionId":0,
        |  "description":"test desc",
        |  "details":"test detail",
        |  "physicalPlanDescription":"test plan",
        |  "sparkPlanInfo": {
        |    "nodeName":"TestNode",
        |    "simpleString":"test string",
        |    "children":[],
        |    "metadata":{},
        |    "metrics":[]
        |  },
        |  "time":0
        |}
      """.stripMargin
    val reconstructedEvent = JsonProtocol.sparkEventFromJson(parse(SQLExecutionStartJsonString))
    val expectedEvent = SparkListenerSQLExecutionStart(0, "test desc", "test detail", "test plan",
      new SparkPlanInfo("TestNode", "test string", Nil, Nil), 0)
    assert(reconstructedEvent == expectedEvent)
  }
} 
Example 10
Source File: ReplayListenerBus.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      while (lines.hasNext) {
        currentLine = lines.next()
        try {
          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            if (!maybeTruncated || lines.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}