org.apache.flink.streaming.api.datastream.KeyedStream Java Examples

The following examples show how to use org.apache.flink.streaming.api.datastream.KeyedStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkGroupByKeyWrapper.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) {
	final Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder);
	final boolean isKeyVoid = keyCoder instanceof VoidCoder;

	return inputDataStream.keyBy(
			new KeySelectorWithQueryableResultType<K, V>() {

				@Override
				public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
					return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
							value.getValue().getKey();
				}

				@Override
				public TypeInformation<K> getProducedType() {
					return keyTypeInfo;
				}
			});
}
 
Example #2
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows(
		KeyedStream<Event, Integer> keyedStream, ParameterTool pt) {

	long eventTimeProgressPerEvent = pt.getLong(
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue());

	return keyedStream.timeWindow(
		Time.milliseconds(
			pt.getLong(
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(),
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue()
			) * eventTimeProgressPerEvent
		)
	);
}
 
Example #3
Source File: SerialStreamingLedgerRuntimeProvider.java    From da-streamingledger with Apache License 2.0 6 votes vote down vote up
@Override
public ResultStreams translate(String name, List<InputAndSpec<?, ?>> streamLedgerSpecs) {
    List<OutputTag<?>> sideOutputTags = createSideOutputTags(streamLedgerSpecs);

    // the input stream is a union of different streams.
    KeyedStream<TaggedElement, Boolean> input = union(streamLedgerSpecs)
            .keyBy(unused -> true);

    // main pipeline
    String serialTransactorName = "SerialTransactor(" + name + ")";
    SingleOutputStreamOperator<Void> resultStream = input
            .process(new SerialTransactor(specs(streamLedgerSpecs), sideOutputTags))
            .name(serialTransactorName)
            .uid(serialTransactorName + "___SERIAL_TX")
            .forceNonParallel()
            .returns(Void.class);

    // gather the sideOutputs.
    Map<String, DataStream<?>> output = new HashMap<>();
    for (OutputTag<?> outputTag : sideOutputTags) {
        DataStream<?> rs = resultStream.getSideOutput(outputTag);
        output.put(outputTag.getId(), rs);
    }
    return new ResultStreams(output);
}
 
Example #4
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows(
		KeyedStream<Event, Integer> keyedStream, ParameterTool pt) {

	long eventTimeProgressPerEvent = pt.getLong(
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue());

	return keyedStream.timeWindow(
		Time.milliseconds(
			pt.getLong(
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(),
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue()
			) * eventTimeProgressPerEvent
		)
	);
}
 
Example #5
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void executeOriginalVariant(StreamExecutionEnvironment env, ParameterTool pt) throws Exception {
	KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt))
		.name("EventSource")
		.uid("EventSource")
		.assignTimestampsAndWatermarks(createTimestampExtractor(pt))
		.keyBy(Event::getKey);

	List<TypeSerializer<ComplexPayload>> stateSer =
		Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig()));

	KeyedStream<Event, Integer> afterStatefulOperations =
		applyOriginalStatefulOperations(source, stateSer, Collections.emptyList());

	afterStatefulOperations
		.flatMap(createSemanticsCheckMapper(pt))
		.name("SemanticsCheckMapper")
		.addSink(new PrintSinkFunction<>());

	env.execute("General purpose test job");
}
 
Example #6
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void executeUpgradedVariant(StreamExecutionEnvironment env, ParameterTool pt) throws Exception {
	KeyedStream<UpgradedEvent, Integer> source = env.addSource(createEventSource(pt))
		.name("EventSource")
		.uid("EventSource")
		.assignTimestampsAndWatermarks(createTimestampExtractor(pt))
		.map(new UpgradeEvent())
		.keyBy(UpgradedEvent::getKey);

	List<TypeSerializer<ComplexPayload>> stateSer =
		Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig()));

	KeyedStream<UpgradedEvent, Integer> afterStatefulOperations =
		applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList());

	afterStatefulOperations
		.map(new DowngradeEvent())
		.keyBy(Event::getKey)
		.flatMap(createSemanticsCheckMapper(pt))
		.name("SemanticsCheckMapper")
		.addSink(new PrintSinkFunction<>());

	env.execute("General purpose test job");
}
 
Example #7
Source File: KafkaShuffleTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
static KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> createKafkaShuffle(
		StreamExecutionEnvironment env,
		String topic,
		int numElementsPerProducer,
		int producerParallelism,
		TimeCharacteristic timeCharacteristic,
		int numberOfPartitions,
		boolean randomness) {
	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor(randomness)).setParallelism(producerParallelism) : source;

	return FlinkKafkaShuffle.persistentKeyBy(
		input,
		topic,
		producerParallelism,
		numberOfPartitions,
		kafkaServer.getStandardProperties(),
		0);
}
 
Example #8
Source File: DataStreamAllroundTestJobFactory.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows(
		KeyedStream<Event, Integer> keyedStream, ParameterTool pt) {

	long eventTimeProgressPerEvent = pt.getLong(
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue());

	return keyedStream.timeWindow(
		Time.milliseconds(
			pt.getLong(
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(),
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue()
			) * eventTimeProgressPerEvent
		)
	);
}
 
Example #9
Source File: KafkaShuffleTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
static KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> createKafkaShuffle(
		StreamExecutionEnvironment env,
		String topic,
		int numElementsPerProducer,
		int producerParallelism,
		TimeCharacteristic timeCharacteristic,
		int numberOfPartitions) {
	return createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions,
		false);
}
 
Example #10
Source File: UdfStreamOperatorCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Assembles a stream of a grouping field and some long data. Applies reduce functions
 * on this stream.
 */
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// base stream
	KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence())
			.keyBy(0);

	stream
			// testing built-in aggregate
			.min(1)
			// failure generation
			.map(new OnceFailingIdentityMapFunction(NUM_INPUT))
			.keyBy(0)
			.addSink(new MinEvictingQueueSink());

	stream
			// testing UDF reducer
			.reduce(new ReduceFunction<Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> reduce(
						Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception {
					return Tuple2.of(value1.f0, value1.f1 + value2.f1);
				}
			})
			.keyBy(0)
			.addSink(new SumEvictingQueueSink());

	stream
			// testing UDF folder
			.fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> fold(
						Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception {
					return Tuple2.of(value.f0, accumulator.f1 + value.f1);
				}
			})
			.keyBy(0)
			.addSink(new FoldEvictingQueueSink());
}
 
Example #11
Source File: RideCount.java    From flink-training-exercises with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));

		// map each ride to a tuple of (driverId, 1)
		DataStream<Tuple2<Long, Long>> tuples = rides.map(new MapFunction<TaxiRide, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(TaxiRide ride) throws Exception {
						return new Tuple2<Long, Long>(ride.driverId, 1L) ;
					}
		});

		// partition the stream by the driverId
		KeyedStream<Tuple2<Long, Long>, Tuple> keyedByDriverId = tuples.keyBy(0);

		// count the rides for each driver
		DataStream<Tuple2<Long, Long>> rideCounts = keyedByDriverId.sum(1);

		// we could, in fact, print out any or all of these streams
		rideCounts.print();

		// run the cleansing pipeline
		env.execute("Ride Count");
	}
 
Example #12
Source File: FlinkGroupAlsoByWindowWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
 * This method assumes that <b>elements are already grouped by key</b>.
 * <p/>
 * The difference with {@link #create(PipelineOptions, PCollection, KeyedStream, Combine.KeyedCombineFn, KvCoder)}
 * is that this method assumes no combiner function
 * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
 *
 * @param options            the general job configuration options.
 * @param input              the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
 * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
 */
public static <K, VIN> DataStream<WindowedValue<KV<K, Iterable<VIN>>>> createForIterable(
		PipelineOptions options,
		PCollection input,
		KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey) {
	Preconditions.checkNotNull(options);

	KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
	Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	Coder<VIN> inputValueCoder = inputKvCoder.getValueCoder();

	FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper(options,
			input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, null);

	Coder<Iterable<VIN>> valueIterCoder = IterableCoder.of(inputValueCoder);
	KvCoder<K, Iterable<VIN>> outputElemCoder = KvCoder.of(keyCoder, valueIterCoder);

	Coder<WindowedValue<KV<K, Iterable<VIN>>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
			outputElemCoder,
			input.getWindowingStrategy().getWindowFn().windowCoder());

	CoderTypeInformation<WindowedValue<KV<K, Iterable<VIN>>>> outputTypeInfo =
			new CoderTypeInformation<>(windowedOutputElemCoder);

	DataStream<WindowedValue<KV<K, Iterable<VIN>>>> groupedByKeyAndWindow = groupedStreamByKey
			.transform("GroupByWindow",
					new CoderTypeInformation<>(windowedOutputElemCoder),
					windower)
			.returns(outputTypeInfo);

	return groupedByKeyAndWindow;
}
 
Example #13
Source File: FlinkGroupAlsoByWindowWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
 * This method assumes that <b>elements are already grouped by key</b>.
 * <p/>
 * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)}
 * is that this method assumes that a combiner function is provided
 * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
 * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state.
 *
 * @param options            the general job configuration options.
 * @param input              the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
 * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
 * @param combiner           the combiner to be used.
 * @param outputKvCoder      the type of the output values.
 */
public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create(
		PipelineOptions options,
		PCollection input,
		KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey,
		Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner,
		KvCoder<K, VOUT> outputKvCoder) {
	Preconditions.checkNotNull(options);

	KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
	FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options,
			input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner);

	Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
			outputKvCoder,
			input.getWindowingStrategy().getWindowFn().windowCoder());

	CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo =
			new CoderTypeInformation<>(windowedOutputElemCoder);

	DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey
			.transform("GroupByWindowWithCombiner",
					new CoderTypeInformation<>(outputKvCoder),
					windower)
			.returns(outputTypeInfo);

	return groupedByKeyAndWindow;
}
 
Example #14
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<Event, Integer> applyTestStatefulOperator(
	String name,
	JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc,
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	return source
		.map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass))
		.name(name)
		.uid(name)
		.returns(Event.class)
		.keyBy(Event::getKey);
}
 
Example #15
Source File: SummaryTreeReduce.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) {

		if (input.getParallelism() <= 2) {
			return input;
		}

		int nextParal = input.getParallelism() / 2;
		DataStream<Tuple2<Integer, S>> unpartitionedStream =
				input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() {
					//collapse two partitions into one
					@Override
					public Integer getKey(Tuple2<Integer, S> record) throws Exception {
						return record.f0 / 2;
					}
				});

		//repartition stream to p / 2 aggregators
		KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream =
				unpartitionedStream.map(new PartitionReMapper()).returns(aggType)
						.setParallelism(nextParal)
						.keyBy(0);

		//window again on event time and aggregate
		DataStream<Tuple2<Integer, S>> aggregatedStream =
				repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS))
						.reduce(new AggregationWrapper<>(getCombineFun()))       
						.setParallelism(nextParal);
		return enhance(aggregatedStream, aggType);
	}
 
Example #16
Source File: UdfStreamOperatorCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Assembles a stream of a grouping field and some long data. Applies reduce functions
 * on this stream.
 */
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// base stream
	KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence())
			.keyBy(0);

	stream
			// testing built-in aggregate
			.min(1)
			// failure generation
			.map(new OnceFailingIdentityMapFunction(NUM_INPUT))
			.keyBy(0)
			.addSink(new MinEvictingQueueSink());

	stream
			// testing UDF reducer
			.reduce(new ReduceFunction<Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> reduce(
						Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception {
					return Tuple2.of(value1.f0, value1.f1 + value2.f1);
				}
			})
			.keyBy(0)
			.addSink(new SumEvictingQueueSink());

	stream
			// testing UDF folder
			.fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> fold(
						Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception {
					return Tuple2.of(value.f0, accumulator.f1 + value.f1);
				}
			})
			.keyBy(0)
			.addSink(new FoldEvictingQueueSink());
}
 
Example #17
Source File: KafkaShuffleExactlyOnceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * To test failure recovery with partition assignment after processing 2/3 data.
 *
 * <p>Schema: (key, timestamp, source instance Id).
 * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3
 */
private void testAssignedToPartitionFailureRecovery(
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	String topic = topic("partition_failure_recovery", timeCharacteristic);
	final int numberOfPartitions = 3;
	final int producerParallelism = 2;
	final int failAfterElements = numElementsPerProducer * producerParallelism * 2 / 3;

	createTestTopic(topic, numberOfPartitions, 1);

	final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic);

	KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions);
	keyedStream
		.process(new PartitionValidator(keyedStream.getKeySelector(), numberOfPartitions, topic))
		.setParallelism(numberOfPartitions)
		.map(new ToInteger(producerParallelism)).setParallelism(numberOfPartitions)
		.map(new FailingIdentityMapper<>(failAfterElements)).setParallelism(1)
		.addSink(new ValidatingExactlyOnceSink(numElementsPerProducer * producerParallelism)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;

	tryExecute(env, topic);

	deleteTestTopic(topic);
}
 
Example #18
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder);

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
}
 
Example #19
Source File: KeyedTransformations.java    From examples-java with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // use event time for the application
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        // configure watermark interval
        env.getConfig().setAutoWatermarkInterval(1000L);

        // ingest sensor stream
        DataStream<SensorReading> readings = env
            // SensorSource generates random temperature readings
            .addSource(new SensorSource())
            // assign timestamps and watermarks which are required for event time
            .assignTimestampsAndWatermarks(new SensorTimeAssigner());

        // group sensor readings by sensor id
        KeyedStream<SensorReading, String> keyed = readings
            .keyBy(r -> r.id);

        // a rolling reduce that computes the highest temperature of each sensor and
        // the corresponding timestamp
        DataStream<SensorReading> maxTempPerSensor = keyed
            .reduce((r1, r2) -> {
                if (r1.temperature > r2.temperature) {
                    return r1;
                } else {
                    return r2;
                }
            });

        maxTempPerSensor.print();

        // execute application
        env.execute("Keyed Transformations Example");
    }
 
Example #20
Source File: MultiStreamTransformations.java    From examples-java with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // use event time for the application
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        // configure watermark interval
        env.getConfig().setAutoWatermarkInterval(1000L);

        // ingest sensor stream
        DataStream<SensorReading> tempReadings = env
            // SensorSource generates random temperature readings
            .addSource(new SensorSource())
            // assign timestamps and watermarks which are required for event time
            .assignTimestampsAndWatermarks(new SensorTimeAssigner());

        // ingest smoke level stream
        DataStream<SmokeLevel> smokeReadings = env
            .addSource(new SmokeLevelSource())
            .setParallelism(1);

        // group sensor readings by sensor id
        KeyedStream<SensorReading, String> keyedTempReadings = tempReadings
            .keyBy(r -> r.id);

        // connect the two streams and raise an alert if the temperature and
        // smoke levels are high
        DataStream<Alert> alerts = keyedTempReadings
            .connect(smokeReadings.broadcast())
            .flatMap(new RaiseAlertFlatMap());

        alerts.print();

        // execute the application
        env.execute("Multi-Stream Transformations Example");
    }
 
Example #21
Source File: KafkaShuffleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * To test data is partitioned to the right partition.
 *
 * <p>Schema: (key, timestamp, source instance Id).
 * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3
 */
private void testAssignedToPartition(
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	String topic = topic("test_assigned_to_partition", timeCharacteristic);
	final int numberOfPartitions = 3;
	final int producerParallelism = 2;

	createTestTopic(topic, numberOfPartitions, 1);

	final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic);

	KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions);
	keyedStream
		.process(new PartitionValidator(keyedStream.getKeySelector(), numberOfPartitions, topic))
		.setParallelism(numberOfPartitions)
		.map(new ElementCountNoMoreThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1)
		.map(new ElementCountNoLessThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1);

	tryExecute(env, topic);

	deleteTestTopic(topic);
}
 
Example #22
Source File: KafkaShuffleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * To watermark from the consumer side always increase.
 *
 * <p>Schema: (key, timestamp, source instance Id).
 * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3
 */
private void testWatermarkIncremental(int numElementsPerProducer) throws Exception {
	TimeCharacteristic timeCharacteristic = EventTime;
	String topic = topic("test_watermark_incremental", timeCharacteristic);
	final int numberOfPartitions = 3;
	final int producerParallelism = 2;

	createTestTopic(topic, numberOfPartitions, 1);

	final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic);

	KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions,
		true);
	keyedStream
		.process(new WatermarkValidator())
		.setParallelism(numberOfPartitions)
		.map(new ElementCountNoMoreThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1)
		.map(new ElementCountNoLessThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1);

	tryExecute(env, topic);

	deleteTestTopic(topic);
}
 
Example #23
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<Event, Integer> applyUpgradedStatefulOperations(
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	source = applyTestStatefulOperator("stateMap2", simpleStateUpdate("stateMap2"), source, stateSer, stateClass);
	source = applyTestStatefulOperator("stateMap1", lastStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyTestStatefulOperator("stateMap3", simpleStateUpdate("stateMap3"), source, stateSer, stateClass);
}
 
Example #24
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<Event, Integer> applyOriginalStatefulOperations(
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	source = applyTestStatefulOperator("stateMap1", simpleStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyTestStatefulOperator("stateMap2", lastStateUpdate("stateMap2"), source, stateSer, stateClass);
}
 
Example #25
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	setupEnvironment(env, pt);

	KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt))
		.name("EventSource")
		.uid("EventSource")
		.assignTimestampsAndWatermarks(createTimestampExtractor(pt))
		.keyBy(Event::getKey);

	List<TypeSerializer<ComplexPayload>> stateSer =
		Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig()));

	KeyedStream<Event, Integer> afterStatefulOperations = isOriginalJobVariant(pt) ?
		applyOriginalStatefulOperations(source, stateSer, Collections.emptyList()) :
		applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList());

	afterStatefulOperations
		.flatMap(createSemanticsCheckMapper(pt))
		.name("SemanticsCheckMapper")
		.addSink(new PrintSinkFunction<>());

	env.execute("General purpose test job");
}
 
Example #26
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<Event, Integer> applyOriginalStatefulOperations(
		KeyedStream<Event, Integer> source,
		List<TypeSerializer<ComplexPayload>> stateSer,
		List<Class<ComplexPayload>> stateClass) {
	source = applyTestStatefulOperator("stateMap1", simpleStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyTestStatefulOperator("stateMap2", lastStateUpdate("stateMap2"), source, stateSer, stateClass);
}
 
Example #27
Source File: UdfStreamOperatorCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Assembles a stream of a grouping field and some long data. Applies reduce functions
 * on this stream.
 */
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// base stream
	KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence())
			.keyBy(0);

	stream
			// testing built-in aggregate
			.min(1)
			// failure generation
			.map(new OnceFailingIdentityMapFunction(NUM_INPUT))
			.keyBy(0)
			.addSink(new MinEvictingQueueSink());

	stream
			// testing UDF reducer
			.reduce(new ReduceFunction<Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> reduce(
						Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception {
					return Tuple2.of(value1.f0, value1.f1 + value2.f1);
				}
			})
			.keyBy(0)
			.addSink(new SumEvictingQueueSink());

	stream
			// testing UDF folder
			.fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> fold(
						Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception {
					return Tuple2.of(value.f0, accumulator.f1 + value.f1);
				}
			})
			.keyBy(0)
			.addSink(new FoldEvictingQueueSink());
}
 
Example #28
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<Event, Integer> applyTestStatefulOperator(
		String name,
		JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc,
		KeyedStream<Event, Integer> source,
		List<TypeSerializer<ComplexPayload>> stateSer,
		List<Class<ComplexPayload>> stateClass) {
	return source
		.map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass))
		.name(name)
		.uid(name)
		.returns(Event.class)
		.keyBy(Event::getKey);
}
 
Example #29
Source File: StatefulStreamJobUpgradeTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<UpgradedEvent, Integer> applyUpgradedStatefulOperations(
		KeyedStream<UpgradedEvent, Integer> source,
		List<TypeSerializer<ComplexPayload>> stateSer,
		List<Class<ComplexPayload>> stateClass) {
	source = applyUpgradedTestStatefulOperator("stateMap2", simpleUpgradedStateUpdate("stateMap2"), source, stateSer, stateClass);
	source = applyUpgradedTestStatefulOperator("stateMap1", lastUpgradedStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyUpgradedTestStatefulOperator("stateMap3", simpleUpgradedStateUpdate("stateMap3"), source, stateSer, stateClass);
}
 
Example #30
Source File: StatefulStreamJobUpgradeTestProgram.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static KeyedStream<Event, Integer> applyTestStatefulOperator(
	String name,
	JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc,
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	return source
		.map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass))
		.name(name)
		.uid(name)
		.returns(Event.class)
		.keyBy(Event::getKey);
}