scala source code of AzureStreamingExample

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.spark.cloud.examples

import com.cloudera.spark.cloud.ObjectStoreExample
import org.apache.hadoop.fs.Path

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
 * Simple example of streaming on Azure.
 */
class AzureStreamingExample extends ObjectStoreExample {

  /**
   * List of the command args for the current example.
   * @return a string
   */
  override protected def usageArgs(): String = {
    "<dest> <execute-seconds> <interval-seconds>"
  }

  /**
   * Action to execute.
   *
   * @param sparkConf configuration to use
   * @param args argument array
   * @return an exit code
   */
  override def action(
      sparkConf: SparkConf,
      args: Array[String]): Int = {
    if (args.length !=  3) {
      return usage()
    }
    sparkConf.setAppName("CloudStreaming")
    applyObjectStoreConfigurationOptions(sparkConf, false)
    val dest = args(0)
    val delay = Integer.valueOf(args(1))
    val interval = Integer.valueOf(args(2))

    // Create the context
    val streaming = new StreamingContext(sparkConf, Seconds(10))

    try {
      // Create the FileInputDStream on the directory regexp and use the
      // stream to look for a new file renamed into it
      val destPath = new Path(dest)
      val sc = streaming.sparkContext
      val hc = sc.hadoopConfiguration

      val fs = destPath.getFileSystem(hc)
      rm(fs, destPath)
      fs.mkdirs(destPath)

      val sightings = sc.longAccumulator("sightings")

      print("===================================")
      print(s"Looking for text files under ${destPath}")
      print("===================================")

      val lines = streaming.textFileStream(dest)

      val matches = lines.map(line => {
        sightings.add(1)
        print(s"[${sightings.value}]: $line")
        line
      })

      // materialize the operation
      matches.print()

      // start the streaming
      streaming.start()

      // sleep a bit to get streaming up and running
      Thread.sleep(delay * 1000)
      print("===================================")
      print(s"Seen ${sightings.value} lines")
      0
    } finally {
      streaming.stop(true)
    }
  }

}

 object AzureStreamingExample {

  def main(args: Array[String]) {
    new AzureStreamingExample().run(args)
  }
}