org.apache.flink.api.common.operators.Keys Java Examples

The following examples show how to use org.apache.flink.api.common.operators.Keys. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CoGroupOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Intermediate step of a CoGroup transformation.
 *
 * <p>To continue the CoGroup transformation, provide a {@link org.apache.flink.api.common.functions.RichCoGroupFunction} by calling
 * {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}.
 *
 */
private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
	if (keys2 == null) {
		throw new NullPointerException();
	}

	if (keys2.isEmpty()) {
		throw new InvalidProgramException("The co-group keys must not be empty.");
	}
	try {
		keys1.areCompatible(keys2);
	} catch (IncompatibleKeysException ike) {
		throw new InvalidProgramException("The pair of co-group keys are not compatible with each other.", ike);
	}

	return new CoGroupOperatorWithoutFunction(keys2);
}
 
Example #2
Source File: UdfAnalyzerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void compareAnalyzerResultWithAnnotationsDualInputWithKeys(Class<?> baseClass, Class<?> clazz,
	TypeInformation<?> in1Type, TypeInformation<?> in2Type, TypeInformation<?> outType, String[] keys1, String[] keys2) {
	// expected
	final Set<Annotation> annotations = FunctionAnnotation.readDualForwardAnnotations(clazz);
	final DualInputSemanticProperties expected = SemanticPropUtil.getSemanticPropsDual(annotations, in1Type,
			in2Type, outType);

	// actual
	final UdfAnalyzer ua = new UdfAnalyzer(baseClass, clazz, "operator", in1Type, in2Type, outType, (keys1 == null) ? null
			: new Keys.ExpressionKeys(keys1, in1Type), (keys2 == null) ? null : new Keys.ExpressionKeys(
					keys2, in2Type), true);
	ua.analyze();
	final DualInputSemanticProperties actual = (DualInputSemanticProperties) ua.getSemanticProperties();

	assertEquals(expected.toString(), actual.toString());
}
 
Example #3
Source File: PartitionOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
private static <T> Ordering computeOrdering(Keys<T> pKeys, Order[] orders) {
	Ordering ordering = new Ordering();
	final int[] logicalKeyPositions = pKeys.computeLogicalKeyPositions();

	if (orders == null) {
		for (int key : logicalKeyPositions) {
			ordering.appendOrdering(key, null, Order.ASCENDING);
		}
	} else {
		final TypeInformation<?>[] originalKeyFieldTypes = pKeys.getOriginalKeyFieldTypes();
		int index = 0;
		for (int i = 0; i < originalKeyFieldTypes.length; i++) {
			final int typeTotalFields = originalKeyFieldTypes[i].getTotalFields();
			for (int j = index; j < index + typeTotalFields; j++) {
				ordering.appendOrdering(logicalKeyPositions[j], null, orders[i]);
			}
			index += typeTotalFields;
		}
	}

	return ordering;
}
 
Example #4
Source File: CoGroupOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Intermediate step of a CoGroup transformation.
 *
 * <p>To continue the CoGroup transformation, provide a {@link org.apache.flink.api.common.functions.RichCoGroupFunction} by calling
 * {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}.
 *
 */
private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) {
	if (keys2 == null) {
		throw new NullPointerException();
	}

	if (keys2.isEmpty()) {
		throw new InvalidProgramException("The co-group keys must not be empty.");
	}
	try {
		keys1.areCompatible(keys2);
	} catch (IncompatibleKeysException ike) {
		throw new InvalidProgramException("The pair of co-group keys are not compatible with each other.", ike);
	}

	return new CoGroupOperatorWithoutFunction(keys2);
}
 
Example #5
Source File: DeltaIterationResultSet.java    From flink with Apache License 2.0 6 votes vote down vote up
DeltaIterationResultSet(ExecutionEnvironment context,
						TypeInformation<ST> typeSS,
						TypeInformation<WT> typeWS,
						DeltaIteration<ST, WT> iterationHead,
						DataSet<ST> nextSolutionSet,
						DataSet<WT> nextWorkset,
						Keys<ST> keys,
						int maxIterations) {
	super(context, typeSS);
	this.iterationHead = iterationHead;
	this.nextWorkset = nextWorkset;
	this.nextSolutionSet = nextSolutionSet;
	this.keys = keys;
	this.maxIterations = maxIterations;
	this.typeWS = typeWS;
}
 
Example #6
Source File: PlanUnwrappingSortedReduceGroupOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
public PlanUnwrappingSortedReduceGroupOperator(
	GroupReduceFunction<IN, OUT> udf,
	Keys.SelectorFunctionKeys<IN, K1> groupingKey,
	Keys.SelectorFunctionKeys<IN, K2> sortingKey,
	String name,
	TypeInformation<OUT> outType,
	TypeInformation<Tuple3<K1, K2, IN>>
	typeInfoWithKey, boolean combinable) {
	super(
		combinable ?
			new TupleUnwrappingGroupCombinableGroupReducer<IN, OUT, K1, K2>(udf) :
			new TupleUnwrappingNonCombinableGroupReducer<IN, OUT, K1, K2>(udf),
		new UnaryOperatorInformation<>(typeInfoWithKey, outType), groupingKey.computeLogicalKeyPositions(), name);

	super.setCombinable(combinable);
}
 
Example #7
Source File: PlanUnwrappingSortedReduceGroupOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public PlanUnwrappingSortedReduceGroupOperator(
	GroupReduceFunction<IN, OUT> udf,
	Keys.SelectorFunctionKeys<IN, K1> groupingKey,
	Keys.SelectorFunctionKeys<IN, K2> sortingKey,
	String name,
	TypeInformation<OUT> outType,
	TypeInformation<Tuple3<K1, K2, IN>>
	typeInfoWithKey, boolean combinable) {
	super(
		combinable ?
			new TupleUnwrappingGroupCombinableGroupReducer<IN, OUT, K1, K2>(udf) :
			new TupleUnwrappingNonCombinableGroupReducer<IN, OUT, K1, K2>(udf),
		new UnaryOperatorInformation<>(typeInfoWithKey, outType), groupingKey.computeLogicalKeyPositions(), name);

	super.setCombinable(combinable);
}
 
Example #8
Source File: PartitionOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
		TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
	super(input, input.getType());

	Preconditions.checkNotNull(pMethod);
	Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
	Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
	Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");

	if (distribution != null) {
		Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
		Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
				"The types of the flat key fields must be equal to the types of the fields of the distribution.");
	}

	if (customPartitioner != null) {
		pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
	}

	this.pMethod = pMethod;
	this.pKeys = pKeys;
	this.partitionLocationName = partitionLocationName;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
}
 
Example #9
Source File: PlanBothUnwrappingCoGroupOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public PlanBothUnwrappingCoGroupOperator(
		CoGroupFunction<I1, I2, OUT> udf,
		Keys.SelectorFunctionKeys<I1, K> key1,
		Keys.SelectorFunctionKeys<I2, K> key2,
		String name,
		TypeInformation<OUT> type,
		TypeInformation<Tuple2<K, I1>> typeInfoWithKey1,
		TypeInformation<Tuple2<K, I2>> typeInfoWithKey2) {

	super(
			new TupleBothUnwrappingCoGrouper<I1, I2, OUT, K>(udf),
			new BinaryOperatorInformation<Tuple2<K, I1>, Tuple2<K, I2>, OUT>(
					typeInfoWithKey1,
					typeInfoWithKey2,
					type),
			key1.computeLogicalKeyPositions(),
			key2.computeLogicalKeyPositions(),
			name);
}
 
Example #10
Source File: SplitDataProperties.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Defines that the data within an input split is sorted on the fields defined by the field expressions
 * in the specified orders. Multiple field expressions must be separated by the semicolon ';' character.
 * All records of an input split must be emitted by the input format in the defined order.
 *
 * <p><b>
 *     IMPORTANT: Providing wrong information with SplitDataProperties can cause wrong results!
 * </b>
 *
 * @param orderFields The field expressions of the grouping key.
 * @param orders The orders of the fields.
 * @return This SplitDataProperties object.
 */
public SplitDataProperties<T> splitsOrderedBy(String orderFields, Order[] orders) {

	if (orderFields == null || orders == null) {
		throw new InvalidProgramException("OrderFields or Orders may not be null.");
	}

	String[] orderKeysA = orderFields.split(";");
	if (orderKeysA.length == 0) {
		throw new InvalidProgramException("OrderFields may not be empty.");
	} else if (orders.length == 0) {
		throw new InvalidProgramException("Orders may not be empty");
	} else if (orderKeysA.length != orders.length) {
		throw new InvalidProgramException("Number of OrderFields and Orders must match.");
	}

	if (this.splitGroupKeys != null) {
		throw new InvalidProgramException("DataSource may either be grouped or sorted.");
	}

	this.splitOrdering = new Ordering();

	for (int i = 0; i < orderKeysA.length; i++) {
		String keyExp = orderKeysA[i];
		Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
		int[] flatKeys = ek.computeLogicalKeyPositions();

		for (int key : flatKeys) {
			// check for duplicates
			for (int okey : splitOrdering.getFieldPositions()) {
				if (key == okey) {
					throw new InvalidProgramException("Duplicate field in field expression " + keyExp);
				}
			}
			// append key
			this.splitOrdering.appendOrdering(key, null, orders[i]);
		}
	}
	return this;
}
 
Example #11
Source File: JoinOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType) {
	super(input1, input2, keys1, keys2,
			new ProjectFlatJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()),
			returnType, hint, Utils.getCallLocationName(4)); // We need to use the 4th element in the stack because the call comes through .types().

	joinProj = null;
}
 
Example #12
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
	KeyedPCollectionTuple<K> input = context.getInput(transform);

	CoGbkResultSchema schema = input.getCoGbkResultSchema();
	List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();

	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);

	TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
	TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();

	PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
	PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();

	DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
	DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);

	TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();

	FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);

	Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
	Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());

	DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
															keySelector1, keySelector2,
			                                                aggregator, typeInfo, null, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), out);
}
 
Example #13
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
	GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();

	TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));

	GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
			new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
 
Example #14
Source File: DataStream.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private <K> DataStream<T> partitionCustom(Partitioner<K> partitioner, Keys<T> keys) {
	KeySelector<T, K> keySelector = KeySelectorUtil.getSelectorForOneKey(keys, partitioner, getType(), getExecutionConfig());

	return setConnectionType(
			new CustomPartitionerWrapper<>(
					clean(partitioner),
					clean(keySelector)));
}
 
Example #15
Source File: SplitDataProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
private int[] getAllFlatKeys(String[] fieldExpressions) {

		int[] allKeys = null;

		for (String keyExp : fieldExpressions) {
			Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
			int[] flatKeys = ek.computeLogicalKeyPositions();

			if (allKeys == null) {
				allKeys = flatKeys;
			} else {
				// check for duplicates
				for (int key1 : flatKeys) {
					for (int key2 : allKeys) {
						if (key1 == key2) {
							throw new InvalidProgramException("Duplicate fields in field expression " + keyExp);
						}
					}
				}
				// append flat keys
				int oldLength = allKeys.length;
				int newLength = oldLength + flatKeys.length;
				allKeys = Arrays.copyOf(allKeys, newLength);
				System.arraycopy(flatKeys, 0, allKeys, oldLength, flatKeys.length);
			}
		}

		return allKeys;
	}
 
Example #16
Source File: SortPartitionOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Appends an additional sort order with the specified field in the specified order to the
 * local partition sorting of the DataSet.
 *
 * @param field The field expression referring to the field of the additional sort order of
 *              the local partition sorting.
 * @param order The order of the additional sort order of the local partition sorting.
 * @return The DataSet with sorted local partitions.
 */
public SortPartitionOperator<T> sortPartition(String field, Order order) {
	if (useKeySelector) {
		throw new InvalidProgramException("Expression keys cannot be appended after a KeySelector");
	}

	ensureSortableKey(field);
	keys.add(new Keys.ExpressionKeys<>(field, getType()));
	orders.add(order);

	return this;
}
 
Example #17
Source File: JoinOperatorSetsBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
protected JoinOperatorSetsPredicateBase(Keys<I1> keys1) {
	if (keys1 == null) {
		throw new NullPointerException();
	}

	if (keys1.isEmpty()) {
		throw new InvalidProgramException("The join keys must not be empty.");
	}

	this.keys1 = keys1;
}
 
Example #18
Source File: DataSink.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Sorts each local partition of a {@link org.apache.flink.api.java.tuple.Tuple} data set
 * on the specified field in the specified {@link Order} before it is emitted by the output format.
 *
 * <p><b>Note: Only tuple data sets can be sorted using integer field indices.</b>
 *
 * <p>The tuple data set can be sorted on multiple fields in different orders
 * by chaining {@link #sortLocalOutput(int, Order)} calls.
 *
 * @param field The Tuple field on which the data set is locally sorted.
 * @param order The Order in which the specified Tuple field is locally sorted.
 * @return This data sink operator with specified output order.
 *
 * @see org.apache.flink.api.java.tuple.Tuple
 * @see Order
 *
 * @deprecated Use {@link DataSet#sortPartition(int, Order)} instead
 */
@Deprecated
@PublicEvolving
public DataSink<T> sortLocalOutput(int field, Order order) {

	// get flat keys
	Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(field, this.type);
	int[] flatKeys = ek.computeLogicalKeyPositions();

	if (!Keys.ExpressionKeys.isSortKey(field, this.type)) {
		throw new InvalidProgramException("Selected sort key is not a sortable type");
	}

	if (this.sortKeyPositions == null) {
		// set sorting info
		this.sortKeyPositions = flatKeys;
		this.sortOrders = new Order[flatKeys.length];
		Arrays.fill(this.sortOrders, order);
	} else {
		// append sorting info to exising info
		int oldLength = this.sortKeyPositions.length;
		int newLength = oldLength + flatKeys.length;
		this.sortKeyPositions = Arrays.copyOf(this.sortKeyPositions, newLength);
		this.sortOrders = Arrays.copyOf(this.sortOrders, newLength);

		for (int i = 0; i < flatKeys.length; i++) {
			this.sortKeyPositions[oldLength + i] = flatKeys[i];
			this.sortOrders[oldLength + i] = order;
		}
	}

	return this;
}
 
Example #19
Source File: SplitDataProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Defines that the data within an input split is sorted on the fields defined by the field expressions
 * in the specified orders. Multiple field expressions must be separated by the semicolon ';' character.
 * All records of an input split must be emitted by the input format in the defined order.
 *
 * <p><b>
 *     IMPORTANT: Providing wrong information with SplitDataProperties can cause wrong results!
 * </b>
 *
 * @param orderFields The field expressions of the grouping key.
 * @param orders The orders of the fields.
 * @return This SplitDataProperties object.
 */
public SplitDataProperties<T> splitsOrderedBy(String orderFields, Order[] orders) {

	if (orderFields == null || orders == null) {
		throw new InvalidProgramException("OrderFields or Orders may not be null.");
	}

	String[] orderKeysA = orderFields.split(";");
	if (orderKeysA.length == 0) {
		throw new InvalidProgramException("OrderFields may not be empty.");
	} else if (orders.length == 0) {
		throw new InvalidProgramException("Orders may not be empty");
	} else if (orderKeysA.length != orders.length) {
		throw new InvalidProgramException("Number of OrderFields and Orders must match.");
	}

	if (this.splitGroupKeys != null) {
		throw new InvalidProgramException("DataSource may either be grouped or sorted.");
	}

	this.splitOrdering = new Ordering();

	for (int i = 0; i < orderKeysA.length; i++) {
		String keyExp = orderKeysA[i];
		Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
		int[] flatKeys = ek.computeLogicalKeyPositions();

		for (int key : flatKeys) {
			// check for duplicates
			for (int okey : splitOrdering.getFieldPositions()) {
				if (key == okey) {
					throw new InvalidProgramException("Duplicate field in field expression " + keyExp);
				}
			}
			// append key
			this.splitOrdering.appendOrdering(key, null, orders[i]);
		}
	}
	return this;
}
 
Example #20
Source File: DeltaIteration.java    From flink with Apache License 2.0 5 votes vote down vote up
public DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) {
	initialSolutionSet = solutionSet;
	initialWorkset = workset;
	solutionSetPlaceholder = new SolutionSetPlaceHolder<>(context, solutionSet.getType(), this);
	worksetPlaceholder = new WorksetPlaceHolder<>(context, workset.getType());
	this.keys = keys;
	this.maxIterations = maxIterations;
}
 
Example #21
Source File: OneInputOperatorTransformation.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Partitions the operator state of a {@link OperatorTransformation} by the given key positions.
 *
 * @param fields The position of the fields on which the {@code OperatorTransformation} will be grouped.
 * @return The {@code OperatorTransformation} with partitioned state.
 */
public KeyedOperatorTransformation<Tuple, T> keyBy(int... fields) {
	if (dataSet.getType() instanceof BasicArrayTypeInfo || dataSet.getType() instanceof PrimitiveArrayTypeInfo) {
		return keyBy(KeySelectorUtil.getSelectorForArray(fields, dataSet.getType()));
	} else {
		return keyBy(new Keys.ExpressionKeys<>(fields, dataSet.getType()));
	}
}
 
Example #22
Source File: CoGroupOperator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private CoGroupOperatorWithoutFunction(Keys<I2> keys2) {
	if (keys2 == null) {
		throw new NullPointerException();
	}
	if (keys2.isEmpty()) {
		throw new InvalidProgramException("The co-group keys must not be empty.");
	}

	this.keys2 = keys2;

	this.groupSortKeyOrderFirst = new ArrayList<>();
	this.groupSortKeyOrderSecond = new ArrayList<>();
}
 
Example #23
Source File: SortPartitionOperator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Appends an additional sort order with the specified field in the specified order to the
 * local partition sorting of the DataSet.
 *
 * @param field The field index of the additional sort order of the local partition sorting.
 * @param order The order of the additional sort order of the local partition sorting.
 * @return The DataSet with sorted local partitions.
 */
public SortPartitionOperator<T> sortPartition(int field, Order order) {
	if (useKeySelector) {
		throw new InvalidProgramException("Expression keys cannot be appended after a KeySelector");
	}

	ensureSortableKey(field);
	keys.add(new Keys.ExpressionKeys<>(field, getType()));
	orders.add(order);

	return this;
}
 
Example #24
Source File: SortPartitionOperator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public <K> SortPartitionOperator(DataSet<T> dataSet, Keys.SelectorFunctionKeys<T, K> sortKey, Order sortOrder, String sortLocationName) {
	this(dataSet, sortLocationName);
	this.useKeySelector = true;

	ensureSortableKey(sortKey);

	keys.add(sortKey);
	orders.add(sortOrder);
}
 
Example #25
Source File: JoinOperatorSetsBase.java    From flink with Apache License 2.0 5 votes vote down vote up
protected JoinOperatorSetsPredicateBase(Keys<I1> keys1) {
	if (keys1 == null) {
		throw new NullPointerException();
	}

	if (keys1.isEmpty()) {
		throw new InvalidProgramException("The join keys must not be empty.");
	}

	this.keys1 = keys1;
}
 
Example #26
Source File: AggregationFunctionTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void minMaxByTest() throws Exception {
	// Tuples are grouped on field 0, aggregated on field 1

	// preparing expected outputs
	List<Tuple3<Integer, Integer, Integer>> maxByFirstExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 1, 1), Tuple3.of(0, 2, 2),
			Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2),
			Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2));

	List<Tuple3<Integer, Integer, Integer>> maxByLastExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 1, 1), Tuple3.of(0, 2, 2),
			Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 2), Tuple3.of(0, 2, 5),
			Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 5), Tuple3.of(0, 2, 8));

	List<Tuple3<Integer, Integer, Integer>> minByFirstExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0),
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0),
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0));

	List<Tuple3<Integer, Integer, Integer>> minByLastExpected = ImmutableList.of(
			Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0), Tuple3.of(0, 0, 0),
			Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 3), Tuple3.of(0, 0, 3),
			Tuple3.of(0, 0, 6), Tuple3.of(0, 0, 6), Tuple3.of(0, 0, 6));

	// some necessary boiler plate
	TypeInformation<Tuple3<Integer, Integer, Integer>> typeInfo = TypeExtractor
			.getForObject(Tuple3.of(0, 0, 0));

	ExecutionConfig config = new ExecutionConfig();

	KeySelector<Tuple3<Integer, Integer, Integer>, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(
			new Keys.ExpressionKeys<>(new int[]{0}, typeInfo),
			typeInfo, config);
	TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);

	// aggregations tested
	ReduceFunction<Tuple3<Integer, Integer, Integer>> maxByFunctionFirst =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MAXBY, true, config);
	ReduceFunction<Tuple3<Integer, Integer, Integer>> maxByFunctionLast =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MAXBY, false, config);
	ReduceFunction<Tuple3<Integer, Integer, Integer>> minByFunctionFirst =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MINBY, true, config);
	ReduceFunction<Tuple3<Integer, Integer, Integer>> minByFunctionLast =
			new ComparableAggregator<>(1, typeInfo, AggregationType.MINBY, false, config);

	assertEquals(maxByFirstExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxByFunctionFirst, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));

	assertEquals(maxByLastExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(maxByFunctionLast, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));

	assertEquals(minByLastExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minByFunctionLast, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));

	assertEquals(minByFirstExpected, MockContext.createAndExecuteForKeyedStream(
			new StreamGroupedReduce<>(minByFunctionFirst, typeInfo.createSerializer(config)),
			getInputByList(),
			keySelector, keyType));
}
 
Example #27
Source File: DataSetUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Range-partitions a DataSet using the specified key selector function.
 */
public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType());
	return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName());
}
 
Example #28
Source File: JoinOperatorSetsBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected JoinFunctionAssigner<I1, I2> createJoinFunctionAssigner(Keys<I2> keys2) {
	DefaultJoin<I1, I2> join = createDefaultJoin(keys2);
	return new DefaultJoinFunctionAssigner(join);
}
 
Example #29
Source File: JoinOperator.java    From flink with Apache License 2.0 4 votes vote down vote up
public EquiJoin(DataSet<I1> input1, DataSet<I2> input2,
		Keys<I1> keys1, Keys<I2> keys2, FlatJoinFunction<I1, I2, OUT> generatedFunction, JoinFunction<I1, I2, OUT> function,
		TypeInformation<OUT> returnType, JoinHint hint, String joinLocationName) {
	this(input1, input2, keys1, keys2, generatedFunction, function, returnType, hint, joinLocationName, JoinType.INNER);
}
 
Example #30
Source File: SortPartitionOperator.java    From flink with Apache License 2.0 4 votes vote down vote up
protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow(Operator<T> input) {

		String name = "Sort at " + sortLocationName;

		if (useKeySelector) {
			return translateToDataFlowWithKeyExtractor(input, (Keys.SelectorFunctionKeys<T, ?>) keys.get(0), orders.get(0), name);
		}

		// flatten sort key positions
		List<Integer> allKeyPositions = new ArrayList<>();
		List<Order> allOrders = new ArrayList<>();
		for (int i = 0, length = keys.size(); i < length; i++) {
			int[] sortKeyPositions = keys.get(i).computeLogicalKeyPositions();
			Order order = orders.get(i);

			for (int sortKeyPosition : sortKeyPositions) {
				allKeyPositions.add(sortKeyPosition);
				allOrders.add(order);
			}
		}

		Ordering partitionOrdering = new Ordering();
		for (int i = 0, length = allKeyPositions.size(); i < length; i++) {
			partitionOrdering.appendOrdering(allKeyPositions.get(i), null, allOrders.get(i));
		}

		// distinguish between partition types
		UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getType(), getType());
		SortPartitionOperatorBase<T> noop = new SortPartitionOperatorBase<>(operatorInfo, partitionOrdering, name);
		noop.setInput(input);
		if (this.getParallelism() < 0) {
			// use parallelism of input if not explicitly specified
			noop.setParallelism(input.getParallelism());
		} else {
			// use explicitly specified parallelism
			noop.setParallelism(this.getParallelism());
		}

		return noop;

	}