package com.memsql.streamliner.starter import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.apache.spark.sql.types._ import org.apache.spark.streaming.StreamingContext import com.memsql.spark.etl.api.{Extractor, PhaseConfig} import com.memsql.spark.etl.utils.PhaseLogger // This extract just returns a static range of 5 integers each batch interval class BasicExtractor extends Extractor { override def next(ssc: StreamingContext, time: Long, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Option[DataFrame] = { logger.info("extracting a constant sequence DataFrame") val schema = StructType(StructField("number", IntegerType, false) :: Nil) val sampleData = List(1,2,3,4,5) val rowRDD = sqlContext.sparkContext.parallelize(sampleData).map(Row(_)) val df = sqlContext.createDataFrame(rowRDD, schema) Some(df) } }