package com.sev7e0.wow.structured_streaming

import com.sev7e0.wow.spark_streaming.StreamingLogger
import org.apache.spark.sql.SparkSession

object A_5_StreamingWordWcount {

  val MASTER = "local"
  val HOST = "localhost"
  val PORT = 9999

  /**
    * 测试:
    * os:macOS
    * command:netcat -lp 9999
    *
    * 随意输入
    */
  def main(args: Array[String]): Unit = {

    //创建SparkSession对象
    val spark = SparkSession.builder()
      .appName(A_5_StreamingWordWcount.getClass.getName)
      .master(MASTER)
      .getOrCreate()

    StreamingLogger.setLoggerLevel()

    //输入表
    val line = spark
      .readStream
      .format("socket")
      .option("host", HOST)
      .option("port", PORT)
      .load()

    //打印结构
    line.printSchema()

    //DataFrame隐式转换为DataSet
    import spark.implicits._
    val word = line.as[String].flatMap(_.split(" "))

    //对流进行操作
    val count = word.groupBy("value").count()

    val query = count.writeStream
      .outputMode(outputMode = "complete")
      .format("console")
      .start()

    query.awaitTermination()
  }

}