Java Code Examples for org.apache.flink.api.java.typeutils.TypeExtractor#getKeySelectorTypes()

The following examples show how to use org.apache.flink.api.java.typeutils.TypeExtractor#getKeySelectorTypes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: CoGroupedStreams.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Specifies a {@link KeySelector} for elements from the second input.
 *
 * @param keySelector The KeySelector to be used for extracting the second input's key for partitioning.
 */
public EqualTo equalTo(KeySelector<T2, KEY> keySelector)  {
	Preconditions.checkNotNull(keySelector);
	final TypeInformation<KEY> otherKey = TypeExtractor.getKeySelectorTypes(keySelector, input2.getType());
	return equalTo(keySelector, otherKey);
}

Example 2

Source File: CoGroupedStreams.java From flink with Apache License 2.0

4 votes

/**
 * Specifies a {@link KeySelector} for elements from the second input.
 *
 * @param keySelector The KeySelector to be used for extracting the second input's key for partitioning.
 */
public EqualTo equalTo(KeySelector<T2, KEY> keySelector)  {
	Preconditions.checkNotNull(keySelector);
	final TypeInformation<KEY> otherKey = TypeExtractor.getKeySelectorTypes(keySelector, input2.getType());
	return equalTo(keySelector, otherKey);
}

Example 3

Source File: AggregationFunctionTest.java From flink with Apache License 2.0

4 votes

@Test
public void groupSumIntegerTest() throws Exception {

	// preparing expected outputs
	List<Tuple2<Integer, Integer>> expectedGroupSumList = new ArrayList<>();
	List<Tuple2<Integer, Integer>> expectedGroupMinList = new ArrayList<>();
	List<Tuple2<Integer, Integer>> expectedGroupMaxList = new ArrayList<>();

	int groupedSum0 = 0;
	int groupedSum1 = 0;
	int groupedSum2 = 0;

	for (int i = 0; i < 9; i++) {
		int groupedSum;
		switch (i % 3) {
			case 0:
				groupedSum = groupedSum0 += i;
				break;
			case 1:
				groupedSum = groupedSum1 += i;
				break;
			default:
				groupedSum = groupedSum2 += i;
				break;
		}

		expectedGroupSumList.add(new Tuple2<>(i % 3, groupedSum));
		expectedGroupMinList.add(new Tuple2<>(i % 3, i % 3));
		expectedGroupMaxList.add(new Tuple2<>(i % 3, i));
	}

	// some necessary boiler plate
	TypeInformation<Tuple2<Integer, Integer>> typeInfo = TypeExtractor.getForObject(new Tuple2<>(0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<Tuple2<Integer, Integer>, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new int[]{0}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<Tuple2<Integer, Integer>> sumFunction =
			new SumAggregator<>(1, typeInfo, config);
	ReduceFunction<Tuple2<Integer, Integer>> minFunction = new ComparableAggregator<>(
			1, typeInfo, AggregationType.MIN, config);
	ReduceFunction<Tuple2<Integer, Integer>> maxFunction = new ComparableAggregator<>(
			1, typeInfo, AggregationType.MAX, config);

	List<Tuple2<Integer, Integer>> groupedSumList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(sumFunction, typeInfo.createSerializer(config)),
			getInputList(),
			keySelector, keyType);

	List<Tuple2<Integer, Integer>> groupedMinList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minFunction, typeInfo.createSerializer(config)),
			getInputList(),
			keySelector, keyType);

	List<Tuple2<Integer, Integer>> groupedMaxList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxFunction, typeInfo.createSerializer(config)),
			getInputList(),
			keySelector, keyType);

	assertEquals(expectedGroupSumList, groupedSumList);
	assertEquals(expectedGroupMinList, groupedMinList);
	assertEquals(expectedGroupMaxList, groupedMaxList);
}

Example 4

Source File: AggregationFunctionTest.java From flink with Apache License 2.0

4 votes

@Test
public void pojoGroupSumIntegerTest() throws Exception {

	// preparing expected outputs
	List<MyPojo> expectedGroupSumList = new ArrayList<>();
	List<MyPojo> expectedGroupMinList = new ArrayList<>();
	List<MyPojo> expectedGroupMaxList = new ArrayList<>();

	int groupedSum0 = 0;
	int groupedSum1 = 0;
	int groupedSum2 = 0;

	for (int i = 0; i < 9; i++) {
		int groupedSum;
		switch (i % 3) {
			case 0:
				groupedSum = groupedSum0 += i;
				break;
			case 1:
				groupedSum = groupedSum1 += i;
				break;
			default:
				groupedSum = groupedSum2 += i;
				break;
		}

		expectedGroupSumList.add(new MyPojo(i % 3, groupedSum));
		expectedGroupMinList.add(new MyPojo(i % 3, i % 3));
		expectedGroupMaxList.add(new MyPojo(i % 3, i));
	}

	// some necessary boiler plate
	TypeInformation<MyPojo> typeInfo = TypeExtractor.getForObject(new MyPojo(0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<MyPojo, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new String[]{"f0"}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<MyPojo> sumFunction = new SumAggregator<>("f1", typeInfo, config);
	ReduceFunction<MyPojo> minFunction = new ComparableAggregator<>("f1", typeInfo, AggregationType.MIN,
			false, config);
	ReduceFunction<MyPojo> maxFunction = new ComparableAggregator<>("f1", typeInfo, AggregationType.MAX,
			false, config);

	List<MyPojo> groupedSumList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(sumFunction, typeInfo.createSerializer(config)),
			getInputPojoList(),
			keySelector, keyType);

	List<MyPojo> groupedMinList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minFunction, typeInfo.createSerializer(config)),
			getInputPojoList(),
			keySelector, keyType);

	List<MyPojo> groupedMaxList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxFunction, typeInfo.createSerializer(config)),
			getInputPojoList(),
			keySelector, keyType);

	assertEquals(expectedGroupSumList, groupedSumList);
	assertEquals(expectedGroupMinList, groupedMinList);
	assertEquals(expectedGroupMaxList, groupedMaxList);
}

Example 5

Source File: AggregationFunctionTest.java From flink with Apache License 2.0

4 votes

@Test
public void pojoMinMaxByTest() throws Exception {
	// Pojos are grouped on field 0, aggregated on field 1

	// preparing expected outputs
	List<MyPojo3> maxByFirstExpected = ImmutableList.of(
			new MyPojo3(0, 0), new MyPojo3(1, 1), new MyPojo3(2, 2),
			new MyPojo3(2, 2), new MyPojo3(2, 2), new MyPojo3(2, 2),
			new MyPojo3(2, 2), new MyPojo3(2, 2), new MyPojo3(2, 2));

	List<MyPojo3> maxByLastExpected = ImmutableList.of(
			new MyPojo3(0, 0), new MyPojo3(1, 1), new MyPojo3(2, 2),
			new MyPojo3(2, 2), new MyPojo3(2, 2), new MyPojo3(2, 5),
			new MyPojo3(2, 5), new MyPojo3(2, 5), new MyPojo3(2, 8));

	List<MyPojo3> minByFirstExpected = ImmutableList.of(
			new MyPojo3(0, 0), new MyPojo3(0, 0), new MyPojo3(0, 0),
			new MyPojo3(0, 0), new MyPojo3(0, 0), new MyPojo3(0, 0),
			new MyPojo3(0, 0), new MyPojo3(0, 0), new MyPojo3(0, 0));

	List<MyPojo3> minByLastExpected = ImmutableList.of(
			new MyPojo3(0, 0), new MyPojo3(0, 0), new MyPojo3(0, 0),
			new MyPojo3(0, 3), new MyPojo3(0, 3), new MyPojo3(0, 3),
			new MyPojo3(0, 6), new MyPojo3(0, 6), new MyPojo3(0, 6));

	// some necessary boiler plate
	TypeInformation<MyPojo3> typeInfo = TypeExtractor.getForObject(new MyPojo3(0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<MyPojo3, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new String[]{"f0"}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<MyPojo3> maxByFunctionFirst =
			new ComparableAggregator<>("f1", typeInfo, AggregationType.MAXBY, true, config);
	ReduceFunction<MyPojo3> maxByFunctionLast =
			new ComparableAggregator<>("f1", typeInfo, AggregationType.MAXBY, false, config);
	ReduceFunction<MyPojo3> minByFunctionFirst =
			new ComparableAggregator<>("f1", typeInfo, AggregationType.MINBY, true, config);
	ReduceFunction<MyPojo3> minByFunctionLast =
			new ComparableAggregator<>("f1", typeInfo, AggregationType.MINBY, false, config);

	assertEquals(maxByFirstExpected, MockContext.createAndExecuteForKeyedStream(
					new StreamGroupedReduce<>(maxByFunctionFirst, typeInfo.createSerializer(config)),
					getInputByPojoList(),
					keySelector, keyType));

	assertEquals(maxByLastExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxByFunctionLast, typeInfo.createSerializer(config)),
			getInputByPojoList(),
			keySelector, keyType));

	assertEquals(minByLastExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minByFunctionLast, typeInfo.createSerializer(config)),
			getInputByPojoList(),
			keySelector, keyType));

	assertEquals(minByFirstExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minByFunctionFirst, typeInfo.createSerializer(config)),
			getInputByPojoList(),
			keySelector, keyType));
}

Example 6

Source File: AggregationFunctionTest.java From flink with Apache License 2.0

4 votes

@Test
public void groupSumIntegerTest() throws Exception {

	// preparing expected outputs
	List<Tuple2<Integer, Integer>> expectedGroupSumList = new ArrayList<>();
	List<Tuple2<Integer, Integer>> expectedGroupMinList = new ArrayList<>();
	List<Tuple2<Integer, Integer>> expectedGroupMaxList = new ArrayList<>();

	int groupedSum0 = 0;
	int groupedSum1 = 0;
	int groupedSum2 = 0;

	for (int i = 0; i < 9; i++) {
		int groupedSum;
		switch (i % 3) {
			case 0:
				groupedSum = groupedSum0 += i;
				break;
			case 1:
				groupedSum = groupedSum1 += i;
				break;
			default:
				groupedSum = groupedSum2 += i;
				break;
		}

		expectedGroupSumList.add(new Tuple2<>(i % 3, groupedSum));
		expectedGroupMinList.add(new Tuple2<>(i % 3, i % 3));
		expectedGroupMaxList.add(new Tuple2<>(i % 3, i));
	}

	// some necessary boiler plate
	TypeInformation<Tuple2<Integer, Integer>> typeInfo = TypeExtractor.getForObject(new Tuple2<>(0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<Tuple2<Integer, Integer>, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new int[]{0}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<Tuple2<Integer, Integer>> sumFunction =
			new SumAggregator<>(1, typeInfo, config);
	ReduceFunction<Tuple2<Integer, Integer>> minFunction = new ComparableAggregator<>(
			1, typeInfo, AggregationType.MIN, config);
	ReduceFunction<Tuple2<Integer, Integer>> maxFunction = new ComparableAggregator<>(
			1, typeInfo, AggregationType.MAX, config);

	List<Tuple2<Integer, Integer>> groupedSumList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(sumFunction, typeInfo.createSerializer(config)),
			getInputList(),
			keySelector, keyType);

	List<Tuple2<Integer, Integer>> groupedMinList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minFunction, typeInfo.createSerializer(config)),
			getInputList(),
			keySelector, keyType);

	List<Tuple2<Integer, Integer>> groupedMaxList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxFunction, typeInfo.createSerializer(config)),
			getInputList(),
			keySelector, keyType);

	assertEquals(expectedGroupSumList, groupedSumList);
	assertEquals(expectedGroupMinList, groupedMinList);
	assertEquals(expectedGroupMaxList, groupedMaxList);
}

Example 7

Source File: AggregationFunctionTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void pojoGroupSumIntegerTest() throws Exception {

	// preparing expected outputs
	List<MyPojo> expectedGroupSumList = new ArrayList<>();
	List<MyPojo> expectedGroupMinList = new ArrayList<>();
	List<MyPojo> expectedGroupMaxList = new ArrayList<>();

	int groupedSum0 = 0;
	int groupedSum1 = 0;
	int groupedSum2 = 0;

	for (int i = 0; i < 9; i++) {
		int groupedSum;
		switch (i % 3) {
			case 0:
				groupedSum = groupedSum0 += i;
				break;
			case 1:
				groupedSum = groupedSum1 += i;
				break;
			default:
				groupedSum = groupedSum2 += i;
				break;
		}

		expectedGroupSumList.add(new MyPojo(i % 3, groupedSum));
		expectedGroupMinList.add(new MyPojo(i % 3, i % 3));
		expectedGroupMaxList.add(new MyPojo(i % 3, i));
	}

	// some necessary boiler plate
	TypeInformation<MyPojo> typeInfo = TypeExtractor.getForObject(new MyPojo(0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<MyPojo, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new String[]{"f0"}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<MyPojo> sumFunction = new SumAggregator<>("f1", typeInfo, config);
	ReduceFunction<MyPojo> minFunction = new ComparableAggregator<>("f1", typeInfo, AggregationType.MIN,
			false, config);
	ReduceFunction<MyPojo> maxFunction = new ComparableAggregator<>("f1", typeInfo, AggregationType.MAX,
			false, config);

	List<MyPojo> groupedSumList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(sumFunction, typeInfo.createSerializer(config)),
			getInputPojoList(),
			keySelector, keyType);

	List<MyPojo> groupedMinList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minFunction, typeInfo.createSerializer(config)),
			getInputPojoList(),
			keySelector, keyType);

	List<MyPojo> groupedMaxList = MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxFunction, typeInfo.createSerializer(config)),
			getInputPojoList(),
			keySelector, keyType);

	assertEquals(expectedGroupSumList, groupedSumList);
	assertEquals(expectedGroupMinList, groupedMinList);
	assertEquals(expectedGroupMaxList, groupedMaxList);
}

Example 8

Source File: CoGroupOperator.java From flink with Apache License 2.0

2 votes

/**
 * Continues a CoGroup transformation and defines a {@link KeySelector} function for the second co-grouped {@link DataSet}.
 *
 * <p>The KeySelector function is called for each element of the second DataSet and extracts a single
 * key value on which the DataSet is grouped.
 *
 * @param keyExtractor The KeySelector function which extracts the key values from the second DataSet on which it is grouped.
 * @return An incomplete CoGroup transformation.
 *           Call {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)} to finalize the CoGroup transformation.
 */
public <K> CoGroupOperatorWithoutFunction equalTo(KeySelector<I2, K> keyExtractor) {
	TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input2.getType());
	return createCoGroupOperator(new SelectorFunctionKeys<>(input1.clean(keyExtractor), input2.getType(), keyType));
}

Example 9

Source File: KeyedStream.java From flink with Apache License 2.0

2 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector) {
	this(dataStream, keySelector, TypeExtractor.getKeySelectorTypes(keySelector, dataStream.getType()));
}

Example 10

Source File: KeyedStream.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector) {
	this(dataStream, keySelector, TypeExtractor.getKeySelectorTypes(keySelector, dataStream.getType()));
}

Example 11

Source File: DataSet.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Locally sorts the partitions of the DataSet on the extracted key in the specified order.
 * The DataSet can be sorted on multiple values by returning a tuple from the KeySelector.
 *
 * <p>Note that no additional sort keys can be appended to a KeySelector sort keys. To sort
 * the partitions by multiple values using KeySelector, the KeySelector must return a tuple
 * consisting of the values.
 *
 * @param keyExtractor The KeySelector function which extracts the key values from the DataSet
 *                     on which the DataSet is sorted.
 * @param order The order in which the DataSet is sorted.
 * @return The DataSet with sorted local partitions.
 */
public <K> SortPartitionOperator<T> sortPartition(KeySelector<T, K> keyExtractor, Order order) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType());
	return new SortPartitionOperator<>(this, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), getType(), keyType), order, Utils.getCallLocationName());
}

Example 12

Source File: JoinOperatorSetsBase.java From flink with Apache License 2.0

2 votes

/**
 * Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}.
 *
 * <p>The KeySelector function is called for each element of the first DataSet and extracts a single
 * key value on which the DataSet is joined.
 *
 * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined.
 * @return An incomplete Join transformation.
 *           Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
 *           {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}
 *           to continue the Join.
 *
 * @see KeySelector
 * @see DataSet
 */
public <K> JoinOperatorSetsPredicateBase where(KeySelector<I1, K> keySelector) {
	TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType());
	return new JoinOperatorSetsPredicateBase(new Keys.SelectorFunctionKeys<>(keySelector, input1.getType(), keyType));
}

Example 13

Source File: CoGroupOperator.java From flink with Apache License 2.0

2 votes

/**
 * Continues a CoGroup transformation and defines a {@link KeySelector} function for the second co-grouped {@link DataSet}.
 *
 * <p>The KeySelector function is called for each element of the second DataSet and extracts a single
 * key value on which the DataSet is grouped.
 *
 * @param keyExtractor The KeySelector function which extracts the key values from the second DataSet on which it is grouped.
 * @return An incomplete CoGroup transformation.
 *           Call {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)} to finalize the CoGroup transformation.
 */
public <K> CoGroupOperatorWithoutFunction equalTo(KeySelector<I2, K> keyExtractor) {
	TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input2.getType());
	return createCoGroupOperator(new SelectorFunctionKeys<>(input1.clean(keyExtractor), input2.getType(), keyType));
}

Example 14

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Partitions a DataSet using the specified KeySelector.
 *
 * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time.
 *
 * @param keyExtractor The KeyExtractor with which the DataSet is hash-partitioned.
 * @return The partitioned DataSet.
 *
 * @see KeySelector
 */
public <K extends Comparable<K>> PartitionOperator<T> partitionByHash(KeySelector<T, K> keyExtractor) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType());
	return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), this.getType(), keyType), Utils.getCallLocationName());
}

Example 15

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Locally sorts the partitions of the DataSet on the extracted key in the specified order.
 * The DataSet can be sorted on multiple values by returning a tuple from the KeySelector.
 *
 * <p>Note that no additional sort keys can be appended to a KeySelector sort keys. To sort
 * the partitions by multiple values using KeySelector, the KeySelector must return a tuple
 * consisting of the values.
 *
 * @param keyExtractor The KeySelector function which extracts the key values from the DataSet
 *                     on which the DataSet is sorted.
 * @param order The order in which the DataSet is sorted.
 * @return The DataSet with sorted local partitions.
 */
public <K> SortPartitionOperator<T> sortPartition(KeySelector<T, K> keyExtractor, Order order) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType());
	return new SortPartitionOperator<>(this, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), getType(), keyType), order, Utils.getCallLocationName());
}

Example 16

Source File: OneInputOperatorTransformation.java From flink with Apache License 2.0

2 votes

/**
 * It creates a new {@link KeyedOperatorTransformation} that uses the provided key for partitioning its operator
 * states.
 *
 * @param keySelector The KeySelector to be used for extracting the key for partitioning.
 * @return The {@code BootstrapTransformation} with partitioned state.
 */
public <K> KeyedOperatorTransformation<K, T> keyBy(KeySelector<T, K> keySelector) {
	TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, dataSet.getType());
	return new KeyedOperatorTransformation<>(dataSet, operatorMaxParallelism, keySelector, keyType);
}

Example 17

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Returns a distinct set of a {@link DataSet} using a {@link KeySelector} function.
 *
 * <p>The KeySelector function is called for each element of the DataSet and extracts a single key value on which the
 * decision is made if two items are distinct or not.
 *
 * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which the
 *                     distinction of the DataSet is decided.
 * @return A DistinctOperator that represents the distinct DataSet.
 */
public <K> DistinctOperator<T> distinct(KeySelector<T, K> keyExtractor) {
	TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType());
	return new DistinctOperator<>(this, new Keys.SelectorFunctionKeys<>(keyExtractor, getType(), keyType), Utils.getCallLocationName());
}

Example 18

Source File: CoGroupOperator.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.
 *
 * <p>The KeySelector function is called for each element of the first DataSet and extracts a single
 * key value on which the DataSet is grouped.
 *
 * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which it is grouped.
 * @return An incomplete CoGroup transformation.
 *           Call {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate#equalTo(int...)} to continue the CoGroup.
 *
 * @see KeySelector
 * @see DataSet
 */
public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) {
	TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input1.getType());
	return new CoGroupOperatorSetsPredicate(new SelectorFunctionKeys<>(input1.clean(keyExtractor), input1.getType(), keyType));
}

Example 19

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Range-partitions a DataSet using the specified KeySelector.
 *
 * <p><b>Important:</b>This operation requires an extra pass over the DataSet to compute the range boundaries and
 * shuffles the whole DataSet over the network. This can take significant amount of time.
 *
 * @param keyExtractor The KeyExtractor with which the DataSet is range-partitioned.
 * @return The partitioned DataSet.
 *
 * @see KeySelector
 */
public <K extends Comparable<K>> PartitionOperator<T> partitionByRange(KeySelector<T, K> keyExtractor) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType());
	return new PartitionOperator<>(this, PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), this.getType(), keyType), Utils.getCallLocationName());
}

Example 20

Source File: KeyedStream.java From flink with Apache License 2.0

2 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector) {
	this(dataStream, keySelector, TypeExtractor.getKeySelectorTypes(keySelector, dataStream.getType()));
}