import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.streaming.http.HttpStreamServer
import org.apache.spark.SparkConf
import org.apache.spark.sql.execution.streaming.http.StreamPrinter
import org.apache.spark.sql.execution.streaming.http.HttpStreamSourceProvider
import org.apache.spark.sql.execution.streaming.http.HttpStreamSinkProvider

/**
 * this Demo tests HttpTextStream and HttpTextSink:
 * 1. choose machine A, run 'HttpStreamDemo start-server-on 8080 /xxxx', this starts a HTTP server which receives data from machine B
 * 2. choose machine B, run 'nc -lk 9999'
 * 3. run 'HttpStreamDemo read-from http://machine-a-host:8080/xxxx' on machine B
 * 4. run 'HttpStreamDemo write-into http://machine-a-host:8080/xxxx' on machine C
 * 5. type some text in nc, data will be received by HttpStreamSink and then consumed as HttpStreamSource, finally displayed on console
 */

object HttpStreamDemo {

	def printUsage() {
		println("USAGE:");
		val name = this.getClass.getSimpleName;
		println(s"\t$name start-server-on 8080 /xxxx");
		println(s"\t$name write-into http://localhost:8080/xxxx");
		println(s"\t$name read-from http://localhost:8080/xxxx");
	}

	def main(args: Array[String]) {
		if (args.length == 0) {
			printUsage();
		}
		else {
			args(0) match {
				case "write-into" ⇒ runAsSink(args(1));
				case "start-server-on" ⇒ runAsReceiver(args(2), args(1).toInt);
				case "read-from" ⇒ runAsSource(args(1));
				case s: String ⇒ printUsage();
			}
		}
	}

	def runAsSink(httpServletURL: String) {
		val spark = SparkSession.builder.appName("StructuredNetworkWordCount").master("local[4]")
			.getOrCreate();

		println(s"reading from tcp://localhost:9999");
		println(s"writing into $httpServletURL");

		val sqlContext = spark.sqlContext;

		//tcp->HttpStreamSink
		val lines = spark.readStream.
			format("socket").
			option("host", "localhost").
			option("port", 9999).
			load();

		spark.conf.set("spark.sql.streaming.checkpointLocation", "/tmp/");

		val query = lines.writeStream
			.format(classOf[HttpStreamSinkProvider].getName)
			.option("httpServletUrl", httpServletURL)
			.option("topic", "topic-1")
			.start();

		query.awaitTermination();
	}

	def runAsReceiver(servletPath: String, httpPort: Int) {
		val spark = SparkSession.builder.appName("StructuredNetworkWordCount").master("local[4]")
			.getOrCreate();

		import spark.implicits._

		//starts a http server with a buffer
		HttpStreamServer.start(servletPath, httpPort)
			.withBuffer()
			.addListener(new StreamPrinter())
			.createTopic[String]("topic-1");
	}

	def runAsSource(httpServletURL: String) {
		val spark = SparkSession.builder.appName("StructuredNetworkWordCount").master("local[4]")
			.getOrCreate();

		spark.conf.set("spark.sql.streaming.checkpointLocation", "/tmp/");

		//HttpStreamSource->map->console
		//HttpStreamSource as a source stream
		val lines = spark.readStream.format(classOf[HttpStreamSourceProvider].getName)
			.option("httpServletUrl", httpServletURL)
			.option("topic", "topic-1").load();

		import spark.implicits._
		val words = lines.as[String].flatMap(_.split(" "));
		val wordCounts = words.groupBy("value").count();

		val query = wordCounts.writeStream.
			outputMode("complete").
			format("console").
			start();

		query.awaitTermination();
	}
}