package com.memsql.streamliner.starter

import org.apache.spark.sql.{Row, DataFrame, SQLContext}
import org.apache.spark.sql.types._
import com.memsql.spark.etl.api.{Transformer, PhaseConfig}
import com.memsql.spark.etl.utils.PhaseLogger

// A helper object to extract the first column of a schema
object ExtractFirstStructField {
  def unapply(schema: StructType): Option[(String, DataType, Boolean, Metadata)] = schema.fields match {
    case Array(first: StructField, _*) => Some((first.name, first.dataType, first.nullable, first.metadata))
  }
}

// This transformer expects an input DataFrame and returns it
class BasicTransformer extends Transformer {
  def transform(sqlContext: SQLContext, df: DataFrame, config: PhaseConfig, logger: PhaseLogger): DataFrame = {
    logger.info("transforming the DataFrame")

    // check that the first column is of type IntegerType and return its name
    val column = df.schema match {
      case ExtractFirstStructField(name: String, dataType: IntegerType, _, _) => name
      case _ => throw new IllegalArgumentException("The first column of the input DataFrame should be IntegerType")
    }

    // filter the dataframe, returning only even numbers
    df.filter(s"$column % 2 = 0")
  }
}