/*
 * Copyright 2017 Azavea
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package geotrellis.pointcloud.spark.store.hadoop

import geotrellis.pointcloud.spark.store.hadoop.formats._
import geotrellis.store.hadoop._
import geotrellis.vector.Extent

import io.circe.Json
import io.pdal._
import io.pdal.pipeline._
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

/**
  * Allows for reading point data files using PDAL as RDD[(ProjectedPackedPointsBounds, PointCloud)]s through Hadoop FileSystem API.
  */
object HadoopPointCloudRDD {

  /**
    * This case class contains the various parameters one can set when reading RDDs from Hadoop using Spark.
    */

  case class Options(
    filesExtensions: Seq[String] = PointCloudInputFormat.filesExtensions,
    pipeline: Json = Read("local") ~ ENil,
    tmpDir: Option[String] = None,
    filterExtent: Option[Extent] = None,
    dimTypes: Option[Iterable[String]] = None
  )

  object Options {
    def DEFAULT = Options()
  }

  /**
    * Creates a RDD[(ProjectedPackedPointsBounds, PointCloud)] whose K depends on the type of the point data file that is going to be read in.
    *
    * @param path     Hdfs point data files path.
    * @param options  An instance of [[Options]] that contains any user defined or default settings.
    */
  def apply(path: Path, options: Options = Options.DEFAULT)(implicit sc: SparkContext): RDD[(HadoopPointCloudHeader, List[PointCloud])] = {
    val conf = sc.hadoopConfiguration.withInputDirectory(path, options.filesExtensions)

    options.tmpDir.foreach(PointCloudInputFormat.setTmpDir(conf, _))
    options.dimTypes.foreach(PointCloudInputFormat.setDimTypes(conf, _))
    PointCloudInputFormat.setPipeline(conf, options.pipeline)

    options.filterExtent match {
      case Some(filterExtent) =>
        PointCloudInputFormat.setFilterExtent(conf, filterExtent)

        sc.newAPIHadoopRDD(
          conf,
          classOf[PointCloudInputFormat],
          classOf[HadoopPointCloudHeader],
          classOf[List[PointCloud]]
        ).filter { case (header, _) =>
          header.extent3D.map(_.toExtent.intersects(filterExtent)).getOrElse(false)
        }
      case None =>
        sc.newAPIHadoopRDD(
          conf,
          classOf[PointCloudInputFormat],
          classOf[HadoopPointCloudHeader],
          classOf[List[PointCloud]]
        )
    }
  }
}