org.apache.flink.util.Collector Scala Examples

The following examples show how to use org.apache.flink.util.Collector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: RegressITCase.scala    From flink-tensorflow   with Apache License 2.0 6 votes vote down vote up
package org.apache.flink.contrib.tensorflow.ml

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._
import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor}
import org.apache.flink.contrib.tensorflow.util.TestData._
import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.util.Collector
import org.apache.flink.util.Preconditions.checkState
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpecLike}
import org.tensorflow.Tensor
import org.tensorflow.contrib.scala.Arrays._
import org.tensorflow.contrib.scala.Rank._
import org.tensorflow.contrib.scala._
import org.tensorflow.example.Example
import resource._

@RunWith(classOf[JUnitRunner])
class RegressITCase extends WordSpecLike
  with Matchers
  with FlinkTestBase {

  override val parallelism = 1

  type LabeledExample = (Example, Float)

  def examples(): Seq[LabeledExample] = {
    for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f))
      yield (example("x" -> feature(v._1)), v._2)
  }

  "A RegressFunction" should {
    "process elements" in {
      val env = StreamExecutionEnvironment.getExecutionEnvironment
      RegistrationUtils.registerTypes(env.getConfig)

      val model = new HalfPlusTwo(new Path("../models/half_plus_two"))

      val outputs = env
        .fromCollection(examples())
        .flatMap(new RichFlatMapFunction[LabeledExample, Float] {
          override def open(parameters: Configuration): Unit = model.open()
          override def close(): Unit = model.close()

          override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = {
            for {
              x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor])
              y <- model.regress_x_to_y(x)
            } {
              // cast as a 1D tensor to use the available conversion
              val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]]
              val actual = o(0)
              checkState(actual == value._2)
              out.collect(actual)
            }
          }
        })
        .print()

      env.execute()
    }
  }
} 
Example 2
Source File: TweetReader.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.sketch.utils

import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.util.Collector

class TweetReader(delimiter: String, searchWords: List[String]) extends RichFlatMapFunction[String, (String, Array[String])]{
  override def flatMap(value: String, out: Collector[(String, Array[String])]): Unit = {
    val id = value.split(delimiter)(0)
    val tweet = value
      .split(delimiter)(5)
      .split(" ")
      .map(_.toLowerCase)
      .filter(searchWords.contains(_))

    if(tweet.nonEmpty){
      out.collect((id, tweet))
    }
  }
} 
Example 3
Source File: TaxiRideProcessor.scala    From pipelines-examples   with Apache License 2.0 5 votes vote down vote up
package pipelines.examples
package processor

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.functions.co._
import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor }
import org.apache.flink.util.Collector

import pipelines.streamlets.StreamletShape
import pipelines.streamlets.avro._
import pipelines.flink.avro._
import pipelines.flink._

class TaxiRideProcessor extends FlinkStreamlet {

  // Step 1: Define inlets and outlets. Note for the outlet you need to specify
  //         the partitioner function explicitly : here we are using the
  //         rideId as the partitioner
  @transient val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride")
  @transient val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare")
  @transient val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString)

  // Step 2: Define the shape of the streamlet. In this example the streamlet
  //         has 2 inlets and 1 outlet
  @transient val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out)

  // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines
  //         the behavior of the streamlet
  override def createLogic() = new FlinkStreamletLogic {
    override def buildExecutionGraph = {
      val rides: DataStream[TaxiRide] =
        readStream(inTaxiRide)
          .filter { ride ⇒ ride.isStart.booleanValue }
          .keyBy("rideId")

      val fares: DataStream[TaxiFare] =
        readStream(inTaxiFare)
          .keyBy("rideId")

      val processed: DataStream[TaxiRideFare] =
        rides
          .connect(fares)
          .flatMap(new EnrichmentFunction)

      writeStream(out, processed)
    }
  }

  import org.apache.flink.configuration.Configuration
  class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] {

    @transient var rideState: ValueState[TaxiRide] = null
    @transient var fareState: ValueState[TaxiFare] = null

    override def open(params: Configuration): Unit = {
      super.open(params)
      rideState = getRuntimeContext.getState(
        new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide]))
      fareState = getRuntimeContext.getState(
        new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare]))
    }

    override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = {
      val fare = fareState.value
      if (fare != null) {
        fareState.clear()
        out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
      } else {
        rideState.update(ride)
      }
    }

    override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = {
      val ride = rideState.value
      if (ride != null) {
        rideState.clear()
        out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
      } else {
        fareState.update(fare)
      }
    }
  }
} 
Example 4
Source File: ContinueRising.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.cep

import java.util

import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.pattern.conditions.IterativeCondition
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


    val pattern = Pattern.begin[CepDemoEvent]("first")
      .next("second").where(new IterativeCondition[CepDemoEvent] {
      override def filter(currentEvent: CepDemoEvent, context: IterativeCondition.Context[CepDemoEvent]): Boolean = {
        // get last event
        val firstList = context.getEventsForPattern("first").iterator()
        var lastStart: CepDemoEvent = null
        // get last from firstList, and get the last one
        while (firstList.hasNext) {
          lastStart = firstList.next()
        }
        if (currentEvent.volume > lastStart.volume) {
          true
        } else {
          false
        }
      }
    })
      // always remember add within, it will reduce the state usage
      .within(Time.minutes(5 * 60 * 1000))

    val patternStream = CEP.pattern(input, pattern)

    val result: DataStream[String] = patternStream.process(
      new PatternProcessFunction[CepDemoEvent, String]() {
        override def processMatch(
                                   events: util.Map[String, util.List[CepDemoEvent]],
                                   ctx: PatternProcessFunction.Context,
                                   out: Collector[String]): Unit = {
          // get the change
          val first = events.get("first").get(0)
          val second = events.get("second").get(0)
          val change = second.volume - first.volume
          out.collect("from : " + first.id + ", to " + second.id + ", change : " + change)
        }

      })

    // for convenient, just print
    result.print()
    env.execute(this.getClass.getName)
  }


} 
Example 5
Source File: AfterMatchStrategyDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.cep

import java.util

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy
import org.apache.flink.cep.pattern.conditions.IterativeCondition
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


    val noSkit = AfterMatchSkipStrategy.noSkip()
    val pattern = Pattern.begin[CepDemoEvent]("first").where(event => {
      event.name.equals("a")
    })
      //      .timesOrMore(1)
      .next("second").where(event => {
      event.name.equals("a")
    })
      .next("third").where(event => {
      event.name.equals("b")
    })
//      .notNext()

    // always remember add within, it will reduce the state usage
    //      .within(Time.minutes(5 * 60 * 1000))

    val patternStream = CEP.pattern(input, pattern)

    val result: DataStream[String] = patternStream.process(
      new PatternProcessFunction[CepDemoEvent, String]() {
        override def processMatch(
                                   events: util.Map[String, util.List[CepDemoEvent]],
                                   ctx: PatternProcessFunction.Context,
                                   out: Collector[String]): Unit = {
          // get the change
          val first = events.get("first").get(0)
          val second = events.get("second").get(0)
          val third = events.get("third").get(0)
          out.collect("first : " + first + ", first " + second + ", third : " + third)
        }

      })

    // for convenient, just print
    result.print()
    env.execute(this.getClass.getName)
  }


} 
Example 6
Source File: ProcessWindowForTrigger.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.trigger

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


object ProcessWindowDemoForTrigger {
  val logger = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {
    // environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    if ("\\".equals(File.pathSeparator)) {
      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
      env.setStateBackend(rock)
      // checkpoint interval
      env.enableCheckpointing(10000)
    }

    val topic = "current_day"
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

    val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
    val stream = env.addSource(kafkaSource)
      .map(s => {
        s
      })
      .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60)))
      .trigger(CountAndTimeTrigger.of(10, Time.seconds(10)))
      .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
        override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {

          var count = 0

          elements.iterator.foreach(s => {
            count += 1
          })
          logger.info("this trigger have : {} item", count)
        }
      })

    // execute job
    env.execute(this.getClass.getName)
  }

} 
Example 7
Source File: MysqlOutputDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.jdbcOutput

import java.io.File

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector


object MysqlOutputDemo {

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val source = new FlinkKafkaConsumer[String]("mysql_output", new SimpleStringSchema, Common.getProp)
    source.setStartFromLatest()
    env.addSource(source)
        .map(li => {
          val tmp = li.split(",")
          new User(tmp(0), tmp(1), tmp(2)toInt, tmp(3))
        })
//        .addSink(new MysqlSink1)
      .writeUsingOutputFormat(new MysqlSink1)

    env.execute("msqlOutput")
  }

} 
Example 8
Source File: BroadCastDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.broadcast

import java.io.File

import com.venn.common.Common
import com.venn.util.StringUtil
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector


object BroadCastDemo {

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }
    // 配置更新流
    val configSource = new FlinkKafkaConsumer[String]("broad_cast_demo", new SimpleStringSchema, Common.getProp)
    // 配置流的初始化,可以通过读取配置文件实现
    var initFilePath = ""
    if ("/".equals(File.separator)){
      initFilePath = "hdfs:///venn/init_file.txt"
    }else{
      initFilePath = "D:\\idea_out\\broad_cast.txt"
    }
    val init = env.readTextFile(initFilePath)
    val descriptor = new MapStateDescriptor[String,  String]("dynamicConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
    val configStream = env.addSource(configSource).union(init).broadcast(descriptor)


    val input = env.addSource(new RadomFunction)
      .connect(configStream)
      .process(new BroadcastProcessFunction[String, String, String] {
        override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {

          println("new config : " + value)
          val configMap = ctx.getBroadcastState(descriptor)
          // process update configMap,读取配置数据,写入广播状态中
          val line = value.split(",")
          configMap.put(line(0), line(1))
        }
        override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
          // use give key, return value
          val configMap = ctx.getBroadcastState(descriptor)
          // 解析三位城市编码,根据广播状态对应的map,转码为城市对应中文
//          println(value)
          val line = value.split(",")
          val code = line(0)
          var va = configMap.get(code)
          // 不能转码的数据默认输出 中国(code=xxx)
          if ( va == null){
            va = "中国(code="+code+")";
          }else{
            va = va + "(code="+code+")"
          }
          out.collect(va + "," + line(1))
        }
      })
    input.print()

    env.execute("BroadCastDemo")
  }
}

class RadomFunction extends SourceFunction[String]{
  var flag = true
  override def cancel(): Unit = {
    flag = false
  }

  override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
    while (flag){
      for (i <- 0 to 300) {
        var nu = i.toString
        while (nu.length < 3) {
          nu = "0" + nu
        }
        ctx.collect(nu + "," + StringUtil.getRandomString(5))
        Thread.sleep(2000)
      }
    }
  }
} 
Example 9
Source File: BadDataHandler.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.flink.wine

import org.apache.flink.api.common.functions.FlatMapFunction
import org.apache.flink.util.Collector

import scala.util.{Failure, Success, Try}

object BadDataHandler {
  def apply[T] = new BadDataHandler[T]
}


class BadDataHandler[T] extends FlatMapFunction[Try[T], T] {
  override def flatMap(t: Try[T], out: Collector[T]): Unit = {
    t match {
      case Success(t) => out.collect(t)
      case Failure(e) => println(s"BAD DATA: ${e.getMessage}")
    }
  }
} 
Example 10
Source File: PSOnlineMatrixFactorizationImplicitTest.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.matrix.factorization

import hu.sztaki.ilab.ps.matrix.factorization.utils.InputTypes
import hu.sztaki.ilab.ps.matrix.factorization.utils.InputTypes.Rating
import hu.sztaki.ilab.ps.matrix.factorization.utils.Utils.{ItemId, UserId}
import hu.sztaki.ilab.ps.matrix.factorization.utils.Vector._
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

import scala.collection.mutable


class PSOnlineMatrixFactorizationImplicitTest {

}

object PSOnlineMatrixFactorizationImplicitTest{

  val numFactors = 10
  val rangeMin = -0.1
  val rangeMax = 0.1
  val learningRate = 0.01
  val userMemory = 128
  val negativeSampleRate = 9
  val pullLimit = 1500
  val workerParallelism = 4
  val psParallelism = 4
  val iterationWaitTime = 10000

  def main(args: Array[String]): Unit = {

    val input_file_name = args(0)
    val userVector_output_name = args(1)
    val itemVector_output_name = args(2)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val data = env.readTextFile(input_file_name)

    val lastFM = data.flatMap(new RichFlatMapFunction[String, Rating] {

      override def flatMap(value: String, out: Collector[Rating]): Unit = {
        val fieldsArray = value.split(" ")
        val r = InputTypes.ratingFromTuple(fieldsArray(1).toInt, fieldsArray(2).toInt, 1.0)
        out.collect(r)
      }
    })

    PSOnlineMatrixFactorization.psOnlineMF(
      lastFM,
      numFactors,
      rangeMin,
      rangeMax,
      learningRate,
      negativeSampleRate,
      userMemory,
      pullLimit,
      workerParallelism,
      psParallelism,
      iterationWaitTime)
        .addSink(new RichSinkFunction[Either[(UserId, Vector), (ItemId, Vector)]] {

          val userVectors = new mutable.HashMap[UserId, Vector]
          val itemVectors = new mutable.HashMap[ItemId, Vector]

          override def invoke(value: Either[(UserId, Vector), (ItemId, Vector)]): Unit = {

            value match {
              case Left((userId, vec)) =>
                userVectors.update(userId, vec)
              case Right((itemId, vec)) =>
                itemVectors.update(itemId, vec)
            }
          }

          override def close(): Unit = {
            val userVectorFile = new java.io.PrintWriter(new java.io.File(userVector_output_name))
            for((k,v) <- userVectors){
              for(value <- v){
                userVectorFile.write(k + ";" + value + '\n')
              }
            }
            userVectorFile.close()

            val itemVectorFile = new java.io.PrintWriter(new java.io.File(itemVector_output_name))
            for((k,v) <- itemVectors){
              for(value <- v){
                itemVectorFile.write(k + ";" + value + '\n')
              }
            }
            itemVectorFile.close()
          }

        }).setParallelism(1)

    env.execute()
  }
} 
Example 11
Source File: TimeAwareTugOfWar.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.sketch.tug.of.war

import hu.sztaki.ilab.ps.sketch.tug.of.war.pslogic.TimeAwareToWPSLogic
import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic}
import hu.sztaki.ilab.ps.sketch.utils.Utils.Vector
import net.openhft.hashing.LongHashFunction
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

class TimeAwareTugOfWar {

}

object TimeAwareTugOfWar {

  def tugOfWar(src: DataStream[(String, Array[String], Int)],
               numHashes: Int,
               workerParallelism: Int,
               psParallelism: Int,
               iterationWaitTime: Long) : DataStream[((Int, Int), Vector)] = {

    val workerLogic = new WorkerLogic[(String, Array[String], Int), Int, (Int, Array[Long]), Any] {


      override def onRecv(data: (String, Array[String], Int), ps: ParameterServerClient[Int, (Int, Array[Long]), Any]): Unit = {
        val id = data._1.toLong
        val tweet = data._2

        val hashArray = (for (i <- 0 to math.ceil(numHashes / 64).toInt) yield LongHashFunction.xx(i).hashLong(id)).toArray


        for(word <- tweet) {
          ps.push(word.hashCode, (data._3, hashArray))
        }
      }

      override def onPullRecv(paramId: Int, paramValue: (Int, Array[Long]), ps: ParameterServerClient[Int, (Int, Array[Long]), Any]): Unit = ???
    }

    val serverLogic = new TimeAwareToWPSLogic(numHashes)

    val modelUpdates = FlinkParameterServer.transform(
      src,
      workerLogic,
      serverLogic,
      workerParallelism,
      psParallelism,
      iterationWaitTime)

    modelUpdates
      .flatMap(new RichFlatMapFunction[Either[Any, ((Int, Int), Vector)], ((Int, Int), Vector)] {
        override def flatMap(value: Either[Any,  ((Int, Int), Vector)], out: Collector[ ((Int, Int), Vector)]): Unit = {
          value match {
            case Left(_) =>
            case Right(param) => out.collect(param)
          }
        }
      })
  }
} 
Example 12
Source File: TugOfWar.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.sketch.tug.of.war

import hu.sztaki.ilab.ps.sketch.utils.Utils._
import hu.sztaki.ilab.ps.sketch.tug.of.war.pslogic.BitSetBasedPSLogic
import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic}
import net.openhft.hashing.LongHashFunction
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector


      override def onPullRecv(paramId: Int,
                              paramValue: Array[Long],
                              ps: ParameterServerClient[Int, Array[Long], Array[String]]): Unit = ???
    }

    val serverLogic = new BitSetBasedPSLogic(numHashes)


    val modelUpdates = FlinkParameterServer.transform(
      src,
      workerLogic,
      serverLogic,
      workerParallelism,
      psParallelism,
      iterationWaitTime)

    modelUpdates
      .flatMap(new RichFlatMapFunction[Either[Array[String], (Int, Vector)], (Int, Vector)] {
        override def flatMap(value: Either[Array[String], (Int, Vector)], out: Collector[(Int, Vector)]): Unit = {
          value match {
            case Left(_) =>
            case Right(param) => out.collect(param)
          }
        }
      })
  }
} 
Example 13
Source File: TimeAwareBloomFilter.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.sketch.bloom.filter

import java.lang.Math.floorMod

import hu.sztaki.ilab.ps.sketch.bloom.filter.pslogic.TimeAwareBloomPSLogic
import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic}
import hu.sztaki.ilab.ps.sketch.utils.Utils._
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

import scala.collection.mutable



      override def onPullRecv(paramId: Int, paramValue: (Int, Vector), ps: ParameterServerClient[Int, (Int, Vector), Any]): Unit = ???
    }


    val serverLogic = new TimeAwareBloomPSLogic

    val modelUpdates = FlinkParameterServer.transform(
      src,
      workerLogic,
      serverLogic,
      workerParallelism,
      psParallelism,
      iterationWaitTime)

    modelUpdates
      .flatMap(
        new RichFlatMapFunction[Either[Any, ((Int, Int), mutable.BitSet)], ((Int, Int), mutable.BitSet)] {
        override def flatMap(value: Either[Any, ((Int, Int), mutable.BitSet)], out: Collector[( (Int, Int), mutable.BitSet)]): Unit = {
          value match {
            case Left(_) =>
            case Right(param) => out.collect(param)
          }
        }
      })
  }
} 
Example 14
Source File: BloomFilter.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.sketch.bloom.filter

import java.lang.Math.floorMod

import hu.sztaki.ilab.ps.sketch.bloom.filter.pslogic.BloomPSLogic
import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic}
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
import hu.sztaki.ilab.ps.sketch.utils.Utils._

import scala.collection.mutable



      override def onPullRecv(paramId: Int, paramValue: Vector, ps: ParameterServerClient[Int, Vector, Array[String]]): Unit = ???
    }

    val serverLogic = new BloomPSLogic

    val modelUpdates = FlinkParameterServer.transform(
      src,
      workerLogic,
      serverLogic,
      workerParallelism,
      psParallelism,
      iterationWaitTime)


    modelUpdates
      .flatMap(new RichFlatMapFunction[Either[Array[String], (Int, mutable.BitSet)], (Int,  mutable.BitSet)] {
        override def flatMap(value: Either[Array[String], (Int,  mutable.BitSet)], out: Collector[(Int,  mutable.BitSet)]): Unit = {
          value match {
            case Left(_) =>
            case Right(param) => out.collect(param)
          }
        }
      })
  }
} 
Example 15
Source File: TimeAwareTweetReader.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.sketch.utils

import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.util.Collector

class TimeAwareTweetReader(delimiter: String, searchWords: List[String], timeStamp: Long, windowSize: Int)
  extends RichFlatMapFunction[String, (String, Array[String], Int)]{

  override def flatMap(value: String, out: Collector[(String, Array[String], Int)]): Unit = {
    val id =  value.split(delimiter)(0)
    val tweet = value.split(delimiter)(5).split(" ").map(_.toLowerCase).filter(searchWords.contains(_))
    val timeSlot = ((value.split(delimiter)(1).toLong - timeStamp) / (windowSize * 60 * 60)).toInt
    if(tweet.nonEmpty){
      out.collect((id, tweet, timeSlot))
    }
  }
} 
Example 16
Source File: AverageSensorReadings.scala    From examples-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.streamingwithflink.chapter1

import io.github.streamingwithflink.util.{SensorReading, SensorSource, SensorTimeAssigner}

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.WindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector


  override def apply(
    sensorId: String,
    window: TimeWindow,
    vals: Iterable[SensorReading],
    out: Collector[SensorReading]): Unit = {

    // compute the average temperature
    val (cnt, sum) = vals.foldLeft((0, 0.0))((c, r) => (c._1 + 1, c._2 + r.temperature))
    val avgTemp = sum / cnt

    // emit a SensorReading with the average temperature
    out.collect(SensorReading(sensorId, window.getEnd, avgTemp))
  }
} 
Example 17
Source File: DataStreamMatcher.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.piglet.cep.flink

import scala.reflect.ClassTag
import dbis.piglet.cep.nfa.NFAController
import dbis.piglet.cep.engines._
import dbis.piglet.cep.ops.SelectionStrategy._
import dbis.piglet.cep.ops.OutputStrategy._
import dbis.piglet.backends.{SchemaClass => Event}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow
import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows
import dbis.piglet.cep.ops.MatchCollector
import dbis.piglet.cep.ops.SelectionStrategy
//import org.apache.flink.api.java.operators.CustomUnaryOperation
import scala.collection.mutable.ListBuffer
//import org.apache.flink.api.java.DataSet
//import org.apache.flink.api.java.ExecutionEnvironment
import scala.collection.JavaConversions._
import org.apache.flink.streaming.api.scala._
import dbis.piglet.cep.ops.EngineConf
import org.apache.flink.util.Collector


class DataStreamMatcher[T <: Event: ClassTag: TypeInformation](@transient val input: DataStream[T], nfa: NFAController[T], flinkEnv: StreamExecutionEnvironment, sstr: SelectionStrategy = SelectionStrategy.FirstMatch, out: OutputStrategy = Combined) extends EngineConf[T](nfa, sstr) with java.io.Serializable {
  object DataStreamProcess {
    def customRun(gw: GlobalWindow, ts: Iterable[T], out: Collector[T]) = {
      ts.foreach { event => engine.runEngine(event)}
      val result = collector.convertEventsToArray()
      result.foreach { res => out.collect(res) }
    }
  }
  def compute(): DataStream[T] = {
    input.windowAll(GlobalWindows.create()).apply(DataStreamProcess.customRun _)   
  }

} 
Example 18
Source File: FlinkStreamlets.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package cloudflow.flink

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.functions.co._
import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor }
import org.apache.flink.util.Collector

import cloudflow.streamlets.StreamletShape
import cloudflow.streamlets.avro._
import cloudflow.flink.avro._

object FlinkConnectedProcessor extends FlinkStreamlet {

  // Step 1: Define inlets and outlets. Note for the outlet you need to specify
  //         the partitioner function explicitly
  val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride")
  val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare")
  val out        = AvroOutlet[TaxiRideFare]("out", _.rideId.toString)

  // Step 2: Define the shape of the streamlet. In this example the streamlet
  //         has 2 inlets and 1 outlet
  val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out)

  // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines
  //         the behavior of the streamlet
  override def createLogic() = new FlinkStreamletLogic {
    override def buildExecutionGraph = {
      val rides: DataStream[TaxiRide] =
        readStream(inTaxiRide)
          .filter { ride ⇒
            ride.isStart.booleanValue
          }
          .keyBy("rideId")

      // rides.print()

      val fares: DataStream[TaxiFare] =
        readStream(inTaxiFare)
          .keyBy("rideId")

      // fares.print()

      val processed: DataStream[TaxiRideFare] =
        rides
          .connect(fares)
          .flatMap(new EnrichmentFunction)

      // processed.print()

      writeStream(out, processed)
    }
  }

  class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] {

    // keyed, managed state
    lazy val rideState: ValueState[TaxiRide] =
      getRuntimeContext.getState(new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide]))
    lazy val fareState: ValueState[TaxiFare] =
      getRuntimeContext.getState(new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare]))

    override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = {
      val fare = fareState.value
      if (fare != null) {
        fareState.clear()
        out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
      } else {
        rideState.update(ride)
      }
    }

    override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = {
      val ride = rideState.value
      if (ride != null) {
        rideState.clear()
        out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
      } else {
        fareState.update(fare)
      }
    }
  }
} 
Example 19
Source File: SocketGenerator.scala    From flink-demos   with Apache License 2.0 5 votes vote down vote up
package com.dataartisans.flink.example.eventpattern.Socket

import java.net.{InetAddress, InetSocketAddress}
import java.nio.{ByteOrder, ByteBuffer}
import java.nio.channels.SocketChannel

import com.dataartisans.flink.example.eventpattern.{StandaloneGeneratorBase, Event}
import org.apache.flink.util.Collector

object SocketGenerator extends StandaloneGeneratorBase {

  val BASE_PORT = 51762

  def main(args: Array[String]): Unit = {

    val numPartitions = 4 //args(0).toInt
    val collectors = new Array[SocketCollector](numPartitions)

    // create the generator threads
    for (i <- 0 until collectors.length) {
      collectors(i) = new SocketCollector(BASE_PORT + i)
    }

    runGenerator(collectors)
  }
}

class SocketCollector(val port: Int) extends Collector[Event] {

  val channel = SocketChannel.open(new InetSocketAddress(InetAddress.getByName("localhost"), port))
  channel.configureBlocking(true)
  channel.finishConnect()

  val buffer = ByteBuffer.allocateDirect(4096).order(ByteOrder.LITTLE_ENDIAN)

  override def collect(t: Event): Unit = {
    if (buffer.remaining() < 8) {
      buffer.flip()
      channel.write(buffer)
      buffer.clear()
    }

    buffer.putInt(t.sourceAddress)
    buffer.putInt(t.event)
  }

  override def close(): Unit = {
    if (buffer.position() > 0) {
      buffer.flip()
      channel.write(buffer)
    }
    channel.close()
  }
} 
Example 20
Source File: StreamingDemo.scala    From flink-demos   with Apache License 2.0 5 votes vote down vote up
package com.dataartisans.flink.example.eventpattern

import java.text.SimpleDateFormat
import java.util
import java.util.{Calendar, Properties, UUID}

import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer

import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.util.Collector

import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.Requests


class StateMachineMapper extends RichFlatMapFunction[Event, Alert] {
  
  private[this] var currentState: ValueState[State] = _
    
  override def open(config: Configuration): Unit = {
    currentState = getRuntimeContext.getState(
      new ValueStateDescriptor("state", classOf[State], InitialState))
  }
  
  override def flatMap(t: Event, out: Collector[Alert]): Unit = {
    val state = currentState.value()
    val nextState = state.transition(t.event)
    
    nextState match {
      case InvalidTransition =>
        out.collect(Alert(t.sourceAddress, state, t.event))
      case x if x.terminal =>
        currentState.clear()
      case x =>
        currentState.update(nextState)
    }
  }
} 
Example 21
Source File: StreamingSessionExample.scala    From flink_training   with Apache License 2.0 5 votes vote down vote up
package com.tmalaska.flinktraining.example.session

import java.util.Properties

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._

object StreamingSessionExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val sessionTimeOut = args(4).toInt

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //val socketStream = env.socketTextStream("localhost",9999, '\n')

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val messageStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    val heartBeatStream = messageStream
      .map(str => {
        implicit val formats = DefaultFormats
        println("str:" + str)
        val hb = read[HeartBeat](str)
        (hb.entityId, hb.eventTime)
      }).keyBy(0).process(new MyProcessFunction(sessionTimeOut))

    heartBeatStream.map(session => {
      println("session:" + session)
      session
    })

    heartBeatStream.print()

    env.execute()
  }
}

class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] {


  private var state:ValueState[SessionObj] = null


  override def open(parameters: Configuration): Unit = {
    state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj]))
  }

  override def processElement(value: (String, Long),
                              ctx: ProcessFunction[(String, Long), SessionObj]#Context,
                              out: Collector[SessionObj]): Unit = {
    val currentSession = state.value()
    var outBoundSessionRecord:SessionObj = null
    if (currentSession == null) {
      outBoundSessionRecord = SessionObj(value._2, value._2, 1)
    } else {
      outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1)

    }
    state.update(outBoundSessionRecord)
    out.collect(outBoundSessionRecord)
    ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut)
  }

  override def onTimer(timestamp: Long,
                       ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext,
                       out: Collector[SessionObj]): Unit = {
    val result = state.value
    if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout
      state.clear()
    }
  }
}

case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int) 
Example 22
Source File: LeftJoinKeyedCoProcessFunction.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.runtime

import com.amazon.milan.compiler.flink.internal.JoinLineageRecordFactory
import com.amazon.milan.compiler.flink.metrics.MetricFactory
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import com.amazon.milan.types.LineageRecord
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction
import org.apache.flink.util.{Collector, OutputTag}


object LeftJoinCoProcessFunction {
  val LeftInputRecordsCounterMetricName = "left_input_record_count"
  val RightInputRecordsCounterMetricName = "right_input_record_count"
  val OutputRecordsCounterMetricName = "output_record_count"
}

import com.amazon.milan.compiler.flink.runtime.LeftJoinCoProcessFunction._


abstract class LeftJoinKeyedCoProcessFunction[TLeft >: Null, TRight >: Null, TKey >: Null <: Product, TOut >: Null](rightTypeInformation: TypeInformation[TRight],
                                                                                                                    keyTypeInformation: TypeInformation[TKey],
                                                                                                                    outputTypeInformation: TypeInformation[TOut],
                                                                                                                    leftRecordIdExtractor: RecordIdExtractor[TLeft],
                                                                                                                    rightRecordIdExtractor: RecordIdExtractor[TRight],
                                                                                                                    outputRecordIdExtractor: RecordIdExtractor[TOut],
                                                                                                                    lineageRecordFactory: JoinLineageRecordFactory,
                                                                                                                    lineageOutputTag: OutputTag[LineageRecord],
                                                                                                                    metricFactory: MetricFactory)
  extends KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]
    with ResultTypeQueryable[RecordWrapper[TOut, TKey]] {

  @transient private lazy val canProduceLineage = leftRecordIdExtractor.canExtractRecordId && rightRecordIdExtractor.canExtractRecordId && outputRecordIdExtractor.canExtractRecordId
  @transient private lazy val leftInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, LeftInputRecordsCounterMetricName)
  @transient private lazy val rightInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, RightInputRecordsCounterMetricName)
  @transient private lazy val outputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, OutputRecordsCounterMetricName)

  @transient private var lastRightValue: ValueState[TRight] = _

  protected def map(left: TLeft, right: TRight): TOut

  protected def postCondition(left: TLeft, right: TRight): Boolean

  override def processElement1(leftRecord: RecordWrapper[TLeft, TKey],
                               context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context,
                               collector: Collector[RecordWrapper[TOut, TKey]]): Unit = {
    this.leftInputRecordsCounter.increment()

    val leftValue = leftRecord.value
    val rightValue = this.lastRightValue.value()

    if (this.postCondition(leftValue, rightValue)) {
      val output = this.map(leftValue, rightValue)

      if (output != null) {
        if (this.canProduceLineage) {
          val lineageRecord = this.createLineageRecord(this.outputRecordIdExtractor(output), leftValue, rightValue)
          context.output(this.lineageOutputTag, lineageRecord)
        }

        collector.collect(RecordWrapper.wrap[TOut, TKey](output, leftRecord.key, 0))
        this.outputRecordsCounter.increment()
      }
    }
  }

  override def processElement2(rightRecord: RecordWrapper[TRight, TKey],
                               context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context,
                               collector: Collector[RecordWrapper[TOut, TKey]]): Unit = {
    this.rightInputRecordsCounter.increment()
    this.lastRightValue.update(rightRecord.value)
  }

  override def open(parameters: Configuration): Unit = {
    val rightValueDescriptor = new ValueStateDescriptor[TRight]("lastRightValue", this.rightTypeInformation)
    this.lastRightValue = this.getRuntimeContext.getState(rightValueDescriptor)
  }

  override def getProducedType: TypeInformation[RecordWrapper[TOut, TKey]] =
    RecordWrapperTypeInformation.wrap(this.outputTypeInformation, this.keyTypeInformation)

  private def createLineageRecord(outputRecordId: String, leftRecord: TLeft, rightRecord: TRight): LineageRecord = {
    val sourceRecords =
      Option(leftRecord).toSeq.map(r => this.lineageRecordFactory.createLeftRecordPointer(this.leftRecordIdExtractor(r))) ++
        Option(rightRecord).toSeq.map(r => this.lineageRecordFactory.createRightRecordPointer(this.rightRecordIdExtractor(r)))

    this.lineageRecordFactory.createLineageRecord(outputRecordId, sourceRecords)
  }
} 
Example 23
Source File: TimeWindowFlatMapProcessWindowFunction.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.runtime

import java.lang
import java.time.Instant

import com.amazon.milan.compiler.flink.TypeUtil
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector


abstract class TimeWindowFlatMapProcessWindowFunction[T >: Null, TInKey >: Null <: Product, TOutKey >: Null <: Product](recordTypeInfo: TypeInformation[T],
                                                                                                                        outKeyTypeInfo: TypeInformation[TOutKey])
  extends ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow]
    with ResultTypeQueryable[RecordWrapper[Option[T], TOutKey]] {

  @transient private var sequenceNumberHelper: SequenceNumberHelper = _

  protected def addWindowStartTimeToKey(key: TInKey, windowStart: Instant): TOutKey

  override def getProducedType: TypeInformation[RecordWrapper[Option[T], TOutKey]] =
    RecordWrapperTypeInformation.wrap(TypeUtil.createOptionTypeInfo(this.recordTypeInfo), this.outKeyTypeInfo)

  override def process(key: TInKey,
                       context: ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow]#Context,
                       items: lang.Iterable[RecordWrapper[Option[T], TInKey]],
                       collector: Collector[RecordWrapper[Option[T], TOutKey]]): Unit = {
    val windowStartTime = Instant.ofEpochMilli(context.window().getStart)

    val record = items.iterator().next()
    val outKey = this.addWindowStartTimeToKey(record.key, windowStartTime)
    val outRecord = RecordWrapper.wrap(record.value, outKey, sequenceNumberHelper.increment())
    collector.collect(outRecord)
  }

  override def open(parameters: Configuration): Unit = {
    this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext)
  }
} 
Example 24
Source File: UnpackOptionProcessFunction.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.runtime

import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import com.typesafe.scalalogging.Logger
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


class UnpackOptionProcessFunction[T >: Null, TKey >: Null <: Product](recordType: TypeInformation[T],
                                                                      keyType: TypeInformation[TKey])
  extends ProcessFunction[RecordWrapper[Option[T], TKey], RecordWrapper[T, TKey]]
    with ResultTypeQueryable[RecordWrapper[T, TKey]] {

  @transient private lazy val logger = Logger(LoggerFactory.getLogger(getClass))

  override def processElement(record: RecordWrapper[Option[T], TKey],
                              context: ProcessFunction[RecordWrapper[Option[T], TKey], RecordWrapper[T, TKey]]#Context,
                              collector: Collector[RecordWrapper[T, TKey]]): Unit = {
    if (record.value.isDefined) {
      collector.collect(RecordWrapper.wrap(record.value.get, record.key, record.sequenceNumber))
    }
  }

  override def getProducedType: TypeInformation[RecordWrapper[T, TKey]] =
    RecordWrapperTypeInformation.wrap(this.recordType, this.keyType)
} 
Example 25
Source File: IdentityFlatMapFunction.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.runtime

import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import org.apache.flink.api.common.functions.FlatMapFunction
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.util.Collector



class IdentityFlatMapFunction[T >: Null, TKey >: Null <: Product](recordTypeInformation: TypeInformation[T],
                                                                  keyTypeInformation: TypeInformation[TKey])
  extends FlatMapFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey]]
    with ResultTypeQueryable[RecordWrapper[T, TKey]] {

  override def flatMap(record: RecordWrapper[T, TKey], collector: Collector[RecordWrapper[T, TKey]]): Unit = {
    collector.collect(record)
  }

  override def getProducedType: TypeInformation[RecordWrapper[T, TKey]] =
    RecordWrapperTypeInformation.wrap(this.recordTypeInformation, this.keyTypeInformation)
} 
Example 26
Source File: AssignSequenceNumberProcessWindowFunctions.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.runtime

import java.lang

import com.amazon.milan.compiler.flink.types.RecordWrapper
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.windows.Window
import org.apache.flink.util.Collector



class AssignSequenceNumberProcessAllWindowFunction[T >: Null, TKey >: Null <: Product, TWindow <: Window]
  extends ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow] {

  @transient private var sequenceNumberHelper: SequenceNumberHelper = _

  override def process(context: ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow]#Context,
                       items: lang.Iterable[RecordWrapper[T, TKey]],
                       collector: Collector[RecordWrapper[T, TKey]]): Unit = {
    val item = items.iterator().next()
    val outputRecord = item.withSequenceNumber(this.sequenceNumberHelper.increment())
    collector.collect(outputRecord)
  }

  override def open(parameters: Configuration): Unit = {
    this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext)
  }
} 
Example 27
Source File: EvaluationFunctionSpec.scala    From flink-jpmml   with GNU Affero General Public License v3.0 5 votes vote down vote up
package io.radicalbit.flink.pmml.scala.api.functions

import io.radicalbit.flink.pmml.scala.api.PmmlModel
import io.radicalbit.flink.pmml.scala.api.reader.ModelReader
import io.radicalbit.flink.pmml.scala.models.prediction.{Prediction, Score}
import io.radicalbit.flink.pmml.scala.utils.models.Input
import io.radicalbit.flink.pmml.scala.utils.PmmlLoaderKit
import io.radicalbit.flink.streaming.spec.core.{FlinkPipelineTestKit, FlinkTestKitCompanion}
import org.apache.flink.api.scala.ClosureCleaner
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
import org.scalatest.{Matchers, WordSpecLike}

object EvaluationFunctionSpec extends FlinkTestKitCompanion[Prediction]

class EvaluationFunctionSpec
    extends FlinkPipelineTestKit[Input, Prediction]
    with WordSpecLike
    with Matchers
    with PmmlLoaderKit {

  private implicit val companion = EvaluationFunctionSpec

  private val reader = ModelReader(getPMMLSource(Source.KmeansPmml))

  private def evaluationOperator[T](source: ModelReader)(f: (T, PmmlModel) => Prediction) =
    new EvaluationFunction[T, Prediction](source) {
      override def flatMap(value: T, out: Collector[Prediction]): Unit = out.collect(f(value, evaluator))
    }

  private val operator = evaluationOperator(reader) { (in: Input, model: PmmlModel) =>
    Prediction(Score(1.0))
  }

  private def pipeline(source: DataStream[Input]): DataStream[Prediction] = source.flatMap(operator)

  "EvaluationFunction" should {

    "be Serializable" in {
      noException should be thrownBy ClosureCleaner.clean(operator, checkSerializable = true)
    }

    "return expected behavior on given function" in {
      executePipeline(Seq(Input(1.0, 2.0)))(pipeline) shouldBe Seq(Prediction(Score(1.0)))
    }

  }

} 
Example 28
Source File: MultiStreamTransformations.scala    From examples-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.streamingwithflink.chapter5

import io.github.streamingwithflink.chapter5.util.{Alert, SmokeLevel, SmokeLevelSource}
import io.github.streamingwithflink.chapter5.util.SmokeLevel.SmokeLevel
import io.github.streamingwithflink.util.{SensorReading, SensorSource, SensorTimeAssigner}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.co.CoFlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector


  class RaiseAlertFlatMap extends CoFlatMapFunction[SensorReading, SmokeLevel, Alert] {

    var smokeLevel = SmokeLevel.Low

    override def flatMap1(in1: SensorReading, collector: Collector[Alert]): Unit = {
      // high chance of fire => true
      if (smokeLevel.equals(SmokeLevel.High) && in1.temperature > 100) {
        collector.collect(Alert("Risk of fire!", in1.timestamp))
      }
    }

    override def flatMap2(in2: SmokeLevel, collector: Collector[Alert]): Unit = {
      smokeLevel = in2
    }
  }
}