Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#keyBy()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#keyBy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOJOWithNestedArrayNoHashCodeKeyRejection() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<POJOWithHashCode> input = env.fromElements(
			new POJOWithHashCode(new int[] {1, 2}));

	TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>(
			PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO);

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));

	input.keyBy("id");
}
 
Example 2
Source File: FlinkGroupByKeyWrapper.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) {
	final Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder);
	final boolean isKeyVoid = keyCoder instanceof VoidCoder;

	return inputDataStream.keyBy(
			new KeySelectorWithQueryableResultType<K, V>() {

				@Override
				public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
					return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
							value.getValue().getKey();
				}

				@Override
				public TypeInformation<K> getProducedType() {
					return keyTypeInfo;
				}
			});
}
 
Example 3
Source File: Utils.java    From incubator-samoa with Apache License 2.0 6 votes vote down vote up
public static DataStream subscribe(DataStream<SamoaType> stream, PartitioningScheme partitioning) {
	switch (partitioning) {
		case BROADCAST:
			return stream.broadcast();
		case GROUP_BY_KEY:
			return stream.keyBy(new KeySelector<SamoaType, String>() {
				@Override
				public String getKey(SamoaType samoaType) throws Exception {
					return samoaType.f0;
				}
			});
		case SHUFFLE:
		default:
			return stream.shuffle();
	}
}
 
Example 4
Source File: DataStreamTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOJOWithNestedArrayNoHashCodeKeyRejection() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<POJOWithHashCode> input = env.fromElements(
			new POJOWithHashCode(new int[] {1, 2}));

	TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>(
			PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO);

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));

	input.keyBy("id");
}
 
Example 5
Source File: DataStreamTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleNestedArrayKeyRejection() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Tuple2<Integer[], String>> input = env.fromElements(
			new Tuple2<>(new Integer[] {1, 2}, "test-test"));

	TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>(
			BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));

	input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() {
		@Override
		public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception {
			return value;
		}
	});
}
 
Example 6
Source File: DataStreamTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleNestedArrayKeyRejection() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Tuple2<Integer[], String>> input = env.fromElements(
			new Tuple2<>(new Integer[] {1, 2}, "test-test"));

	TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>(
			BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));

	input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() {
		@Override
		public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception {
			return value;
		}
	});
}
 
Example 7
Source File: DataStreamTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOJOnoHashCodeKeyRejection() {

	KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector =
			new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {
				@Override
				public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
					return value;
				}
			};

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<POJOWithoutHashCode> input = env.fromElements(
			new POJOWithoutHashCode(new int[] {1, 2}));

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);

	input.keyBy(keySelector);
}
 
Example 8
Source File: DataStreamTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOJOWithNestedArrayNoHashCodeKeyRejection() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<POJOWithHashCode> input = env.fromElements(
			new POJOWithHashCode(new int[] {1, 2}));

	TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>(
			PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO);

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));

	input.keyBy("id");
}
 
Example 9
Source File: DataStreamTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOJOnoHashCodeKeyRejection() {

	KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector =
			new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {
				@Override
				public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
					return value;
				}
			};

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<POJOWithoutHashCode> input = env.fromElements(
			new POJOWithoutHashCode(new int[] {1, 2}));

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);

	input.keyBy(keySelector);
}
 
Example 10
Source File: DataStreamTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOJOnoHashCodeKeyRejection() {

	KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector =
			new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {
				@Override
				public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
					return value;
				}
			};

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<POJOWithoutHashCode> input = env.fromElements(
			new POJOWithoutHashCode(new int[] {1, 2}));

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);

	input.keyBy(keySelector);
}
 
Example 11
Source File: DataStreamTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testEnumKeyRejection() {
	KeySelector<Tuple2<TestEnum, String>, TestEnum> keySelector = value -> value.f0;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Tuple2<TestEnum, String>> input = env.fromElements(
			Tuple2.of(TestEnum.FOO, "Foo"),
			Tuple2.of(TestEnum.BAR, "Bar"));

	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + EnumTypeInfo.of(TestEnum.class) + " cannot be used as key."));

	input.keyBy(keySelector);
}
 
Example 12
Source File: MultiStreamTransformations.java    From examples-java with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // use event time for the application
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        // configure watermark interval
        env.getConfig().setAutoWatermarkInterval(1000L);

        // ingest sensor stream
        DataStream<SensorReading> tempReadings = env
            // SensorSource generates random temperature readings
            .addSource(new SensorSource())
            // assign timestamps and watermarks which are required for event time
            .assignTimestampsAndWatermarks(new SensorTimeAssigner());

        // ingest smoke level stream
        DataStream<SmokeLevel> smokeReadings = env
            .addSource(new SmokeLevelSource())
            .setParallelism(1);

        // group sensor readings by sensor id
        KeyedStream<SensorReading, String> keyedTempReadings = tempReadings
            .keyBy(r -> r.id);

        // connect the two streams and raise an alert if the temperature and
        // smoke levels are high
        DataStream<Alert> alerts = keyedTempReadings
            .connect(smokeReadings.broadcast())
            .flatMap(new RaiseAlertFlatMap());

        alerts.print();

        // execute the application
        env.execute("Multi-Stream Transformations Example");
    }
 
Example 13
Source File: DataStreamTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Tuple2<Integer[], String>> input = env.fromElements(
			new Tuple2<>(new Integer[] {1, 2}, "barfoo")
	);

	Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType()));

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key."));

	input.keyBy(keySelector);
}
 
Example 14
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder);

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
}
 
Example 15
Source File: SummaryTreeReduce.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) {

		if (input.getParallelism() <= 2) {
			return input;
		}

		int nextParal = input.getParallelism() / 2;
		DataStream<Tuple2<Integer, S>> unpartitionedStream =
				input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() {
					//collapse two partitions into one
					@Override
					public Integer getKey(Tuple2<Integer, S> record) throws Exception {
						return record.f0 / 2;
					}
				});

		//repartition stream to p / 2 aggregators
		KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream =
				unpartitionedStream.map(new PartitionReMapper()).returns(aggType)
						.setParallelism(nextParal)
						.keyBy(0);

		//window again on event time and aggregate
		DataStream<Tuple2<Integer, S>> aggregatedStream =
				repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS))
						.reduce(new AggregationWrapper<>(getCombineFun()))       
						.setParallelism(nextParal);
		return enhance(aggregatedStream, aggType);
	}
 
Example 16
Source File: DataStreamTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Tuple2<Integer[], String>> input = env.fromElements(
			new Tuple2<>(new Integer[] {1, 2}, "barfoo")
	);

	Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType()));

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key."));

	input.keyBy(keySelector);
}
 
Example 17
Source File: RideCount.java    From flink-training-exercises with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));

		// map each ride to a tuple of (driverId, 1)
		DataStream<Tuple2<Long, Long>> tuples = rides.map(new MapFunction<TaxiRide, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(TaxiRide ride) throws Exception {
						return new Tuple2<Long, Long>(ride.driverId, 1L) ;
					}
		});

		// partition the stream by the driverId
		KeyedStream<Tuple2<Long, Long>, Tuple> keyedByDriverId = tuples.keyBy(0);

		// count the rides for each driver
		DataStream<Tuple2<Long, Long>> rideCounts = keyedByDriverId.sum(1);

		// we could, in fact, print out any or all of these streams
		rideCounts.print();

		// run the cleansing pipeline
		env.execute("Ride Count");
	}
 
Example 18
Source File: LongRidesCEPExercise.java    From flink-training-exercises with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		// CheckpointedTaxiRideSource delivers events in order
		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new CheckpointedTaxiRideSource(input, servingSpeedFactor)));

		DataStream<TaxiRide> keyedRides = rides
			.keyBy("rideId");

		// A complete taxi ride has a START event followed by an END event
		// This pattern is incomplete ...
		Pattern<TaxiRide, TaxiRide> completedRides = Pattern.<TaxiRide>begin("start");

		// We want to find rides that have NOT been completed within 120 minutes.
		// This pattern matches rides that ARE completed.
		// Below we will ignore rides that match this pattern, and emit those that timeout.
		PatternStream<TaxiRide> patternStream = CEP.pattern(keyedRides, completedRides.within(Time.minutes(120)));

		OutputTag<TaxiRide> timedout = new OutputTag<TaxiRide>("timedout"){};

		SingleOutputStreamOperator<TaxiRide> longRides = patternStream.flatSelect(
				timedout,
				new TaxiRideTimedOut<TaxiRide>(),
				new FlatSelectNothing<TaxiRide>()
		);

		printOrTest(longRides.getSideOutput(timedout));

		throw new MissingSolutionException();

//		env.execute("Long Taxi Rides (CEP)");
	}
 
Example 19
Source File: DataStreamTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private <K> void assertArrayKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Tuple2<Integer[], String>> input = env.fromElements(
			new Tuple2<>(new Integer[] {1, 2}, "barfoo"));

	Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType()));

	// adjust the rule
	expectedException.expect(InvalidProgramException.class);
	expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key."));

	input.keyBy(keySelector);
}
 
Example 20
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(
    DataStream<WindowedValue<KV<K, V>>> inputDataStream,
    WindowingStrategy<?, ?> windowingStrategy,
    WindowedValueCoder<KV<K, V>> windowedInputCoder,
    String operatorName,
    StreamingTranslationContext context) {
  KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();

  SingletonKeyedWorkItemCoder<K, V> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputElementCoder.getKeyCoder(),
          inputElementCoder.getValueCoder(),
          windowingStrategy.getWindowFn().windowCoder());

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder =
      WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream =
      inputDataStream
          .flatMap(
              new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(
                  context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector<K, V> keySelector =
      new WorkItemKeySelector<>(inputElementCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream =
      workItemStream.keyBy(keySelector);

  SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputElementCoder.getValueCoder());

  Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder),
          windowingStrategy.getWindowFn().windowCoder());

  TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo =
      new CoderTypeInformation<>(outputCoder);

  TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");

  WindowDoFnOperator<K, V, Iterable<V>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          operatorName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputElementCoder.getKeyCoder(),
          (KeySelector) keySelector /* key selector */);

  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream =
      keyedWorkItemStream.transform(
          operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator);

  return outputDataStream;
}