package net.jgp.labs.spark.l020_dstream.l020_filesystem_text_dataframe_class;

import java.io.Serializable;

import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;

import net.jgp.labs.spark.x.utils.streaming.StreamingUtils;

public class StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp
    implements Serializable {
  private static final long serialVersionUID = 6795623748995704732L;

  public static void main(String[] args) {
    StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp app =
        new StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp();
    app.start();
  }

  private void start() {
    // Create a local StreamingContext with two working thread and batch
    // interval of
    // 1 second
    SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
        "Streaming Ingestion File System Text File to Dataframe");
    JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
        .seconds(5));

    JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
        .getInputDirectory());

    msgDataStream.print();
    // Create JavaRDD<Row>
    msgDataStream.foreachRDD(new RowProcessor());

    jssc.start();
    try {
      jssc.awaitTermination();
    } catch (InterruptedException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
}