Java Code Examples for org.apache.flink.api.common.operators.Keys#ExpressionKeys

The following examples show how to use org.apache.flink.api.common.operators.Keys#ExpressionKeys . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataSink.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Sorts each local partition of a {@link org.apache.flink.api.java.tuple.Tuple} data set
 * on the specified field in the specified {@link Order} before it is emitted by the output format.
 *
 * <p><b>Note: Only tuple data sets can be sorted using integer field indices.</b>
 *
 * <p>The tuple data set can be sorted on multiple fields in different orders
 * by chaining {@link #sortLocalOutput(int, Order)} calls.
 *
 * @param field The Tuple field on which the data set is locally sorted.
 * @param order The Order in which the specified Tuple field is locally sorted.
 * @return This data sink operator with specified output order.
 *
 * @see org.apache.flink.api.java.tuple.Tuple
 * @see Order
 *
 * @deprecated Use {@link DataSet#sortPartition(int, Order)} instead
 */
@Deprecated
@PublicEvolving
public DataSink<T> sortLocalOutput(int field, Order order) {

	// get flat keys
	Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(field, this.type);
	int[] flatKeys = ek.computeLogicalKeyPositions();

	if (!Keys.ExpressionKeys.isSortKey(field, this.type)) {
		throw new InvalidProgramException("Selected sort key is not a sortable type");
	}

	if (this.sortKeyPositions == null) {
		// set sorting info
		this.sortKeyPositions = flatKeys;
		this.sortOrders = new Order[flatKeys.length];
		Arrays.fill(this.sortOrders, order);
	} else {
		// append sorting info to exising info
		int oldLength = this.sortKeyPositions.length;
		int newLength = oldLength + flatKeys.length;
		this.sortKeyPositions = Arrays.copyOf(this.sortKeyPositions, newLength);
		this.sortOrders = Arrays.copyOf(this.sortOrders, newLength);

		for (int i = 0; i < flatKeys.length; i++) {
			this.sortKeyPositions[oldLength + i] = flatKeys[i];
			this.sortOrders[oldLength + i] = order;
		}
	}

	return this;
}
 
Example 2
Source File: PythonPlanBinder.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private <IN1, IN2, OUT> void createCoGroupOperation(PythonOperationInfo info, TypeInformation<OUT> type) {
	DataSet<IN1> op1 = sets.getDataSet(info.parentID);
	DataSet<IN2> op2 = sets.getDataSet(info.otherID);
	Keys.ExpressionKeys<IN1> key1 = new Keys.ExpressionKeys<>(info.keys1.toArray(new String[info.keys1.size()]), op1.getType());
	Keys.ExpressionKeys<IN2> key2 = new Keys.ExpressionKeys<>(info.keys2.toArray(new String[info.keys2.size()]), op2.getType());
	PythonCoGroup<IN1, IN2, OUT> pcg = new PythonCoGroup<>(operatorConfig, info.envID, info.setID, type);
	sets.add(info.setID, new CoGroupRawOperator<>(op1, op2, key1, key2, pcg, type, info.name).setParallelism(info.parallelism));
}
 
Example 3
Source File: SplitDataProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
private int[] getAllFlatKeys(String[] fieldExpressions) {

		int[] allKeys = null;

		for (String keyExp : fieldExpressions) {
			Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
			int[] flatKeys = ek.computeLogicalKeyPositions();

			if (allKeys == null) {
				allKeys = flatKeys;
			} else {
				// check for duplicates
				for (int key1 : flatKeys) {
					for (int key2 : allKeys) {
						if (key1 == key2) {
							throw new InvalidProgramException("Duplicate fields in field expression " + keyExp);
						}
					}
				}
				// append flat keys
				int oldLength = allKeys.length;
				int newLength = oldLength + flatKeys.length;
				allKeys = Arrays.copyOf(allKeys, newLength);
				System.arraycopy(flatKeys, 0, allKeys, oldLength, flatKeys.length);
			}
		}

		return allKeys;
	}
 
Example 4
Source File: SplitDataProperties.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private int[] getAllFlatKeys(String[] fieldExpressions) {

		int[] allKeys = null;

		for (String keyExp : fieldExpressions) {
			Keys.ExpressionKeys<T> ek = new Keys.ExpressionKeys<>(keyExp, this.type);
			int[] flatKeys = ek.computeLogicalKeyPositions();

			if (allKeys == null) {
				allKeys = flatKeys;
			} else {
				// check for duplicates
				for (int key1 : flatKeys) {
					for (int key2 : allKeys) {
						if (key1 == key2) {
							throw new InvalidProgramException("Duplicate fields in field expression " + keyExp);
						}
					}
				}
				// append flat keys
				int oldLength = allKeys.length;
				int newLength = oldLength + flatKeys.length;
				allKeys = Arrays.copyOf(allKeys, newLength);
				System.arraycopy(flatKeys, 0, allKeys, oldLength, flatKeys.length);
			}
		}

		return allKeys;
	}
 
Example 5
Source File: DistinctOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
public DistinctOperator(DataSet<T> input, Keys<T> keys, String distinctLocationName) {
	super(input, input.getType());

	this.distinctLocationName = distinctLocationName;

	// if keys is null distinction is done on all fields
	if (keys == null) {
		keys = new Keys.ExpressionKeys<>(input.getType());
	}

	this.keys = keys;
}
 
Example 6
Source File: DistinctOperator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public DistinctOperator(DataSet<T> input, Keys<T> keys, String distinctLocationName) {
	super(input, input.getType());

	this.distinctLocationName = distinctLocationName;

	// if keys is null distinction is done on all fields
	if (keys == null) {
		keys = new Keys.ExpressionKeys<>(input.getType());
	}

	this.keys = keys;
}
 
Example 7
Source File: DataSet.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Initiates a delta iteration. A delta iteration is similar to a regular iteration (as started by {@link #iterate(int)},
 * but maintains state across the individual iteration steps. The Solution set, which represents the current state
 * at the beginning of each iteration can be obtained via {@link org.apache.flink.api.java.operators.DeltaIteration#getSolutionSet()} ()}.
 * It can be be accessed by joining (or CoGrouping) with it. The DataSet that represents the workset of an iteration
 * can be obtained via {@link org.apache.flink.api.java.operators.DeltaIteration#getWorkset()}.
 * The solution set is updated by producing a delta for it, which is merged into the solution set at the end of each
 * iteration step.
 *
 * <p>The delta iteration must be closed by calling {@link org.apache.flink.api.java.operators.DeltaIteration#closeWith(DataSet, DataSet)}. The two
 * parameters are the delta for the solution set and the new workset (the data set that will be fed back).
 * The return value of the {@code closeWith(DataSet, DataSet)} method is the resulting
 * data set after the iteration has terminated. Delta iterations terminate when the feed back data set
 * (the workset) is empty. In addition, a maximum number of steps is given as a fall back termination guard.
 *
 * <p>Elements in the solution set are uniquely identified by a key. When merging the solution set delta, contained elements
 * with the same key are replaced.
 *
 * <p><b>NOTE:</b> Delta iterations currently support only tuple valued data types. This restriction
 * will be removed in the future. The key is specified by the tuple position.
 *
 * <p>A code example for a delta iteration is as follows
 * <pre>
 * {@code
 * DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
 *                                                  initialState.iterateDelta(initialFeedbackSet, 100, 0);
 *
 * DataSet<Tuple2<Long, Long>> delta = iteration.groupBy(0).aggregate(Aggregations.AVG, 1)
 *                                              .join(iteration.getSolutionSet()).where(0).equalTo(0)
 *                                              .flatMap(new ProjectAndFilter());
 *
 * DataSet<Tuple2<Long, Long>> feedBack = delta.join(someOtherSet).where(...).equalTo(...).with(...);
 *
 * // close the delta iteration (delta and new workset are identical)
 * DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, feedBack);
 * }
 * </pre>
 *
 * @param workset The initial version of the data set that is fed back to the next iteration step (the workset).
 * @param maxIterations The maximum number of iteration steps, as a fall back safeguard.
 * @param keyPositions The position of the tuple fields that is used as the key of the solution set.
 *
 * @return The DeltaIteration that marks the start of a delta iteration.
 *
 * @see org.apache.flink.api.java.operators.DeltaIteration
 */
public <R> DeltaIteration<T, R> iterateDelta(DataSet<R> workset, int maxIterations, int... keyPositions) {
	Preconditions.checkNotNull(workset);
	Preconditions.checkNotNull(keyPositions);

	Keys.ExpressionKeys<T> keys = new Keys.ExpressionKeys<>(keyPositions, getType());
	return new DeltaIteration<>(getExecutionEnvironment(), getType(), this, workset, keys, maxIterations);
}
 
Example 8
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Hash-partitions a DataSet on the specified key fields.
 *
 * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time.
 *
 * @param fields The field expressions on which the DataSet is hash-partitioned.
 * @return The partitioned DataSet.
 */
public PartitionOperator<T> partitionByHash(String... fields) {
	return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName());
}
 
Example 9
Source File: JoinOperator.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * @return An incomplete Join transformation.
 *           Call {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(int...)} or
 *           {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(KeySelector)}
 *           to continue the Join.
 */
@Override
public JoinOperatorSetsPredicate where(String... fields) {
	return new JoinOperatorSetsPredicate(new Keys.ExpressionKeys<>(fields, input1.getType()));
}
 
Example 10
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Hash-partitions a DataSet on the specified key fields.
 *
 * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time.
 *
 * @param fields The field indexes on which the DataSet is hash-partitioned.
 * @return The partitioned DataSet.
 */
public PartitionOperator<T> partitionByHash(int... fields) {
	return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName());
}
 
Example 11
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Groups a {@link DataSet} using field expressions. A field expression is either the name of a public field
 * or a getter method with parentheses of the {@link DataSet}S underlying type. A dot can be used to drill down
 * into objects, as in {@code "field1.getInnerField2()" }.
 * This method returns an {@link UnsortedGrouping} on which one of the following grouping transformation
 *   can be applied.
 * <ul>
 *   <li>{@link UnsortedGrouping#sortGroup(int, org.apache.flink.api.common.operators.Order)} to get a {@link SortedGrouping}.
 *   <li>{@link UnsortedGrouping#aggregate(Aggregations, int)} to apply an Aggregate transformation.
 *   <li>{@link UnsortedGrouping#reduce(org.apache.flink.api.common.functions.ReduceFunction)} to apply a Reduce transformation.
 *   <li>{@link UnsortedGrouping#reduceGroup(org.apache.flink.api.common.functions.GroupReduceFunction)} to apply a GroupReduce transformation.
 * </ul>
 *
 * @param fields One or more field expressions on which the DataSet will be grouped.
 * @return A Grouping on which a transformation needs to be applied to obtain a transformed DataSet.
 *
 * @see Tuple
 * @see UnsortedGrouping
 * @see AggregateOperator
 * @see ReduceOperator
 * @see org.apache.flink.api.java.operators.GroupReduceOperator
 * @see DataSet
 */
public UnsortedGrouping<T> groupBy(String... fields) {
	return new UnsortedGrouping<>(this, new Keys.ExpressionKeys<>(fields, getType()));
}
 
Example 12
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Groups a {@link Tuple} {@link DataSet} using field position keys.
 *
 * <p><b>Note: Field position keys only be specified for Tuple DataSets.</b>
 *
 * <p>The field position keys specify the fields of Tuples on which the DataSet is grouped.
 * This method returns an {@link UnsortedGrouping} on which one of the following grouping transformation
 *   can be applied.
 * <ul>
 *   <li>{@link UnsortedGrouping#sortGroup(int, org.apache.flink.api.common.operators.Order)} to get a {@link SortedGrouping}.
 *   <li>{@link UnsortedGrouping#aggregate(Aggregations, int)} to apply an Aggregate transformation.
 *   <li>{@link UnsortedGrouping#reduce(org.apache.flink.api.common.functions.ReduceFunction)} to apply a Reduce transformation.
 *   <li>{@link UnsortedGrouping#reduceGroup(org.apache.flink.api.common.functions.GroupReduceFunction)} to apply a GroupReduce transformation.
 * </ul>
 *
 * @param fields One or more field positions on which the DataSet will be grouped.
 * @return A Grouping on which a transformation needs to be applied to obtain a transformed DataSet.
 *
 * @see Tuple
 * @see UnsortedGrouping
 * @see AggregateOperator
 * @see ReduceOperator
 * @see org.apache.flink.api.java.operators.GroupReduceOperator
 * @see DataSet
 */
public UnsortedGrouping<T> groupBy(int... fields) {
	return new UnsortedGrouping<>(this, new Keys.ExpressionKeys<>(fields, getType()));
}
 
Example 13
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Returns a distinct set of a {@link DataSet} using expression keys.
 *
 * <p>The field expression keys specify the fields of a {@link org.apache.flink.api.common.typeutils.CompositeType}
 * (e.g., Tuple or Pojo type) on which the decision is made if two elements are distinct or not.
 * In case of a {@link org.apache.flink.api.common.typeinfo.AtomicType}, only the wildcard expression ("*") is valid.
 *
 * @param fields One or more field expressions on which the distinction of the DataSet is decided.
 * @return A DistinctOperator that represents the distinct DataSet.
 */
public DistinctOperator<T> distinct(String... fields) {
	return new DistinctOperator<>(this, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName());
}
 
Example 14
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Returns a distinct set of a {@link Tuple} {@link DataSet} using field position keys.
 *
 * <p>The field position keys specify the fields of Tuples on which the decision is made if two Tuples are distinct or
 * not.
 *
 * <p>Note: Field position keys can only be specified for Tuple DataSets.
 *
 * @param fields One or more field positions on which the distinction of the DataSet is decided.
 * @return A DistinctOperator that represents the distinct DataSet.
 */
public DistinctOperator<T> distinct(int... fields) {
	return new DistinctOperator<>(this, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName());
}
 
Example 15
Source File: JoinOperatorSetsBase.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Continues a Join transformation.
 *
 * <p>Defines the fields of the first join {@link DataSet} that should be used as grouping keys. Fields
 * are the names of member fields of the underlying type of the data set.
 *
 * @param fields The  fields of the first join DataSets that should be used as keys.
 * @return An incomplete Join transformation.
 *           Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
 *           {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}
 *           to continue the Join.
 *
 * @see Tuple
 * @see DataSet
 */
public JoinOperatorSetsPredicateBase where(String... fields) {
	return new JoinOperatorSetsPredicateBase(new Keys.ExpressionKeys<>(fields, input1.getType()));
}
 
Example 16
Source File: JoinOperatorSetsBase.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Continues a Join transformation.
 *
 * <p>Defines the {@link Tuple} fields of the first join {@link DataSet} that should be used as join keys.
 *
 * <p><b>Note: Fields can only be selected as join keys on Tuple DataSets.</b>
 *
 * @param fields The indexes of the other Tuple fields of the first join DataSets that should be used as keys.
 * @return An incomplete Join transformation.
 *           Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
 *           {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}
 *           to continue the Join.
 *
 * @see Tuple
 * @see DataSet
 */
public JoinOperatorSetsPredicateBase where(int... fields) {
	return new JoinOperatorSetsPredicateBase(new Keys.ExpressionKeys<>(fields, input1.getType()));
}
 
Example 17
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Groups a {@link Tuple} {@link DataSet} using field position keys.
 *
 * <p><b>Note: Field position keys only be specified for Tuple DataSets.</b>
 *
 * <p>The field position keys specify the fields of Tuples on which the DataSet is grouped.
 * This method returns an {@link UnsortedGrouping} on which one of the following grouping transformation
 *   can be applied.
 * <ul>
 *   <li>{@link UnsortedGrouping#sortGroup(int, org.apache.flink.api.common.operators.Order)} to get a {@link SortedGrouping}.
 *   <li>{@link UnsortedGrouping#aggregate(Aggregations, int)} to apply an Aggregate transformation.
 *   <li>{@link UnsortedGrouping#reduce(org.apache.flink.api.common.functions.ReduceFunction)} to apply a Reduce transformation.
 *   <li>{@link UnsortedGrouping#reduceGroup(org.apache.flink.api.common.functions.GroupReduceFunction)} to apply a GroupReduce transformation.
 * </ul>
 *
 * @param fields One or more field positions on which the DataSet will be grouped.
 * @return A Grouping on which a transformation needs to be applied to obtain a transformed DataSet.
 *
 * @see Tuple
 * @see UnsortedGrouping
 * @see AggregateOperator
 * @see ReduceOperator
 * @see org.apache.flink.api.java.operators.GroupReduceOperator
 * @see DataSet
 */
public UnsortedGrouping<T> groupBy(int... fields) {
	return new UnsortedGrouping<>(this, new Keys.ExpressionKeys<>(fields, getType()));
}
 
Example 18
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Returns a distinct set of a {@link DataSet} using expression keys.
 *
 * <p>The field expression keys specify the fields of a {@link org.apache.flink.api.common.typeutils.CompositeType}
 * (e.g., Tuple or Pojo type) on which the decision is made if two elements are distinct or not.
 * In case of a {@link org.apache.flink.api.common.typeinfo.AtomicType}, only the wildcard expression ("*") is valid.
 *
 * @param fields One or more field expressions on which the distinction of the DataSet is decided.
 * @return A DistinctOperator that represents the distinct DataSet.
 */
public DistinctOperator<T> distinct(String... fields) {
	return new DistinctOperator<>(this, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName());
}
 
Example 19
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Groups a {@link Tuple} {@link DataSet} using field position keys.
 *
 * <p><b>Note: Field position keys only be specified for Tuple DataSets.</b>
 *
 * <p>The field position keys specify the fields of Tuples on which the DataSet is grouped.
 * This method returns an {@link UnsortedGrouping} on which one of the following grouping transformation
 *   can be applied.
 * <ul>
 *   <li>{@link UnsortedGrouping#sortGroup(int, org.apache.flink.api.common.operators.Order)} to get a {@link SortedGrouping}.
 *   <li>{@link UnsortedGrouping#aggregate(Aggregations, int)} to apply an Aggregate transformation.
 *   <li>{@link UnsortedGrouping#reduce(org.apache.flink.api.common.functions.ReduceFunction)} to apply a Reduce transformation.
 *   <li>{@link UnsortedGrouping#reduceGroup(org.apache.flink.api.common.functions.GroupReduceFunction)} to apply a GroupReduce transformation.
 * </ul>
 *
 * @param fields One or more field positions on which the DataSet will be grouped.
 * @return A Grouping on which a transformation needs to be applied to obtain a transformed DataSet.
 *
 * @see Tuple
 * @see UnsortedGrouping
 * @see AggregateOperator
 * @see ReduceOperator
 * @see org.apache.flink.api.java.operators.GroupReduceOperator
 * @see DataSet
 */
public UnsortedGrouping<T> groupBy(int... fields) {
	return new UnsortedGrouping<>(this, new Keys.ExpressionKeys<>(fields, getType()));
}
 
Example 20
Source File: DataStream.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Partitions a tuple DataStream on the specified key fields using a custom partitioner.
 * This method takes the key position to partition on, and a partitioner that accepts the key type.
 *
 * <p>Note: This method works only on single field keys.
 *
 * @param partitioner The partitioner to assign partitions to keys.
 * @param field The field index on which the DataStream is partitioned.
 * @return The partitioned DataStream.
 */
public <K> DataStream<T> partitionCustom(Partitioner<K> partitioner, int field) {
	Keys.ExpressionKeys<T> outExpressionKeys = new Keys.ExpressionKeys<>(new int[]{field}, getType());
	return partitionCustom(partitioner, outExpressionKeys);
}