org.apache.flink.api.java.operators.Operator Java Examples

The following examples show how to use org.apache.flink.api.java.operators.Operator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OperatorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigurationOfParallelism() {
	Operator operator = new MockOperator();

	// verify explicit change in parallelism
	int parallelism = 36;
	operator.setParallelism(parallelism);

	assertEquals(parallelism, operator.getParallelism());

	// verify that parallelism is reset to default flag value
	parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
	operator.setParallelism(parallelism);

	assertEquals(parallelism, operator.getParallelism());
}
 
Example #2
Source File: OperatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigurationOfParallelism() {
	Operator operator = new MockOperator();

	// verify explicit change in parallelism
	int parallelism = 36;
	operator.setParallelism(parallelism);

	assertEquals(parallelism, operator.getParallelism());

	// verify that parallelism is reset to default flag value
	parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
	operator.setParallelism(parallelism);

	assertEquals(parallelism, operator.getParallelism());
}
 
Example #3
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
private DataSet<Tuple> translateMerge(List<DataSet<Tuple>> inputs, FlowNode node) {

		DataSet<Tuple> unioned = null;
		TypeInformation<Tuple> type = null;

		int maxDop = -1;

		for(DataSet<Tuple> input : inputs) {
			maxDop = Math.max(maxDop, ((Operator)input).getParallelism());
			if(unioned == null) {
				unioned = input;
				type = input.getType();
			}
			else {
				unioned = unioned.union(input);
			}
		}
		return unioned.map(new IdMapper())
				.returns(type)
				.setParallelism(maxDop);

	}
 
Example #4
Source File: OperatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigurationOfParallelism() {
	Operator operator = new MockOperator();

	// verify explicit change in parallelism
	int parallelism = 36;
	operator.setParallelism(parallelism);

	assertEquals(parallelism, operator.getParallelism());

	// verify that parallelism is reset to default flag value
	parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
	operator.setParallelism(parallelism);

	assertEquals(parallelism, operator.getParallelism());
}
 
Example #5
Source File: OperatorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigurationOfResource() throws Exception{
	Operator operator = new MockOperator();

	Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class, ResourceSpec.class);
	opMethod.setAccessible(true);

	// verify explicit change in resources
	ResourceSpec minResources = ResourceSpec.newBuilder().setCpuCores(1.0).setHeapMemoryInMB(100).build();
	ResourceSpec preferredResources = ResourceSpec.newBuilder().setCpuCores(2.0).setHeapMemoryInMB(200).build();
	opMethod.invoke(operator, minResources, preferredResources);

	assertEquals(minResources, operator.getMinResources());
	assertEquals(preferredResources, operator.getPreferredResources());
}
 
Example #6
Source File: OperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigurationOfResource() throws Exception{
	Operator operator = new MockOperator();

	Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class, ResourceSpec.class);
	opMethod.setAccessible(true);

	// verify explicit change in resources
	ResourceSpec minResources = ResourceSpec.newBuilder().setCpuCores(1.0).setHeapMemoryInMB(100).build();
	ResourceSpec preferredResources = ResourceSpec.newBuilder().setCpuCores(2.0).setHeapMemoryInMB(200).build();
	opMethod.invoke(operator, minResources, preferredResources);

	assertEquals(minResources, operator.getMinResources());
	assertEquals(preferredResources, operator.getPreferredResources());
}
 
Example #7
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
private void translateSink(FlowProcess flowProcess, DataSet<Tuple> input, FlowNode node) {

		Tap tap = this.getSingle(node.getSinkTaps());
		Configuration sinkConfig = this.getNodeConfig(node);
		tap.sinkConfInit(flowProcess, sinkConfig);

		int desiredDop = tap.getScheme().getNumSinkParts();
		int inputDop = ((Operator)input).getParallelism();
		int dop;

		if (inputDop == 1) {
			// input operators have dop 1. Probably because they perform a non-keyed reduce or coGroup
			dop = 1;
		}
		else {
			if (desiredDop > 0) {
				// output dop explicitly set.
				if (input instanceof GroupReduceOperator) {
					// input is a reduce and we must preserve its sorting.
					// we must set the desired dop also for reduce and related operators
					adjustDopOfReduceOrCoGroup((GroupReduceOperator) input, desiredDop);
				}
				dop = desiredDop;
			}
			else {
				dop = inputDop;
			}
		}

		input
				.output(new TapOutputFormat(node))
				.name(tap.getIdentifier())
				.setParallelism(dop)
				.withParameters(FlinkConfigConverter.toFlinkConfig(sinkConfig));

	}
 
Example #8
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
private DataSet<Tuple> translateMap(DataSet<Tuple> input, FlowNode node) {

		Fields outFields = getOutScope(node).getOutValuesFields();
		registerKryoTypes(outFields);

		int dop = ((Operator)input).getParallelism();

		return input
				.mapPartition(new EachMapper(node))
				.returns(new TupleTypeInfo(outFields))
				.withParameters(this.getFlinkNodeConfig(node))
				.setParallelism(dop)
				.name("map-" + node.getID());

	}
 
Example #9
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
private DataSet<Tuple2<Tuple, Tuple[]>> prepareInnerCrossInput(List<DataSet<Tuple>> inputs, FlowNode node, Fields[] inputFields, int dop) {

		int numJoinInputs = inputs.size();

		TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo =
				new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
						new TupleTypeInfo(Fields.UNKNOWN),
						new TupleArrayTypeInfo(numJoinInputs, Arrays.copyOf(inputFields, 1))
				);

		int mapDop = ((Operator)inputs.get(0)).getParallelism();

		// prepare tuple list for join
		DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0)
				.map(new JoinPrepareMapper(numJoinInputs, null, null))
				.returns(tupleJoinListsTypeInfo)
				.setParallelism(mapDop)
				.name("coGroup-" + node.getID());

		for (int i = 1; i < inputs.size(); i++) {

			tupleJoinListsTypeInfo =
					new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
							new TupleTypeInfo(Fields.UNKNOWN),
							new TupleArrayTypeInfo(numJoinInputs, Arrays.copyOf(inputFields, i+1))
					);

			tupleJoinLists = tupleJoinLists.crossWithTiny(inputs.get(i))
					.with(new TupleAppendCrosser(i))
					.returns(tupleJoinListsTypeInfo)
					.setParallelism(dop)
					.name("coGroup-" + node.getID());
		}

		return tupleJoinLists;
	}
 
Example #10
Source File: OperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigurationOfResource() throws Exception{
	Operator operator = new MockOperator();

	Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class, ResourceSpec.class);
	opMethod.setAccessible(true);

	// verify explicit change in resources
	ResourceSpec minResources = ResourceSpec.newBuilder(1.0, 100).build();
	ResourceSpec preferredResources = ResourceSpec.newBuilder(2.0, 200).build();
	opMethod.invoke(operator, minResources, preferredResources);

	assertEquals(minResources, operator.getMinResources());
	assertEquals(preferredResources, operator.getPreferredResources());
}
 
Example #11
Source File: BootstrapTransformationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private static <T> int getParallelism(DataSet<T> dataSet) {
	//All concrete implementations of DataSet are operators so this should always be safe.
	return ((Operator) dataSet).getParallelism();
}
 
Example #12
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private DataSet<Tuple3<Tuple, Integer, Tuple>> prepareBufferCoGroupInput(List<DataSet<Tuple>> inputs,
					FlowNode node, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys, int dop) {

	DataSet<Tuple3<Tuple, Integer, Tuple>> coGroupInput = null;

	for(int i=0; i<inputs.size(); i++) {

		// get Flink DataSet
		DataSet<Tuple> input = inputs.get(i);

		// get keys
		int[] keyPos = inputFields[i].getPos(keyFields[i]);

		if(keyFields[i].isNone()) {
			// set default key
			keyFields[i] = new Fields("defaultKey");
		}

		TupleTypeInfo keysTypeInfo = inputFields[i].isDefined() ?
				new TupleTypeInfo(inputFields[i].select(keyFields[i])) :
				new TupleTypeInfo(Fields.UNKNOWN);

		TypeInformation<Tuple3<Tuple, Integer, Tuple>> keyedType =
				new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
						keysTypeInfo,
						BasicTypeInfo.INT_TYPE_INFO,
						new TupleTypeInfo(inputFields[i])
		);

		int inputDop = ((Operator)input).getParallelism();

		// add mapper
		DataSet<Tuple3<Tuple, Integer, Tuple>> keyedInput = input
				.map(new BufferJoinKeyExtractor(i, keyPos))
				.returns(keyedType)
				.setParallelism(inputDop)
				.name("coGroup-" + node.getID());

		// add to groupByInput
		if(coGroupInput == null) {
			coGroupInput = keyedInput;
		}
		else {
			coGroupInput = coGroupInput
					.union(keyedInput);
		}
	}

	return coGroupInput;
}
 
Example #13
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private DataSet<Tuple> translateInnerHashJoin(FlowNode node, List<DataSet<Tuple>> inputs, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys) {

		int numJoinInputs = inputs.size();

		// get out fields of node
		Scope outScope = getOutScope(node);
		Fields outFields;
		if (outScope.isEvery()) {
			outFields = outScope.getOutGroupingFields();
		} else {
			outFields = outScope.getOutValuesFields();
		}
		registerKryoTypes(outFields);

		int probeSideDOP = ((Operator)inputs.get(0)).getParallelism();

		if(numJoinInputs == 2) {
			// binary join

			return inputs.get(0).join(inputs.get(1), JoinHint.BROADCAST_HASH_SECOND)
					.where(flinkKeys[0]).equalTo(flinkKeys[1])
					.with(new BinaryHashJoinJoiner(node, inputFields[0], keyFields[0]))
					.withParameters(this.getFlinkNodeConfig(node))
					.setParallelism(probeSideDOP)
					.returns(new TupleTypeInfo(outFields))
					.name("hashjoin-" + node.getID());

		}
		else {
			// nary join

			TupleTypeInfo keysTypeInfo = inputFields[0].isDefined() ?
					new TupleTypeInfo(inputFields[0].select(keyFields[0])) :
					new TupleTypeInfo(Fields.UNKNOWN);
			keysTypeInfo.registerKeyFields(keyFields[0]);


			TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo =
					new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
							keysTypeInfo,
							new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, 1))
					);

			int mapDop = ((Operator) inputs.get(0)).getParallelism();

			// prepare tuple list for join
			DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0)
					.map(new JoinPrepareMapper(numJoinInputs - 1, inputFields[0], keyFields[0]))
					.returns(tupleJoinListsTypeInfo)
					.setParallelism(mapDop)
					.name("hashjoin-" + node.getID());

			for (int i = 0; i < flinkKeys[0].length; i++) {
				flinkKeys[0][i] = "f0." + i;
			}

			// join all inputs except last
			for (int i = 1; i < inputs.size()-1; i++) {

				tupleJoinListsTypeInfo =
						new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
								keysTypeInfo,
								new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, i+1))
						);

				tupleJoinLists = tupleJoinLists.join(inputs.get(i), JoinHint.BROADCAST_HASH_SECOND)
						.where(flinkKeys[0]).equalTo(flinkKeys[i])
						.with(new TupleAppendJoiner(i))
						.returns(tupleJoinListsTypeInfo)
						.withForwardedFieldsFirst(flinkKeys[0])
						.setParallelism(probeSideDOP)
						.name("hashjoin-" + node.getID());
			}

			// join last input
			return tupleJoinLists.join(inputs.get(numJoinInputs-1), JoinHint.BROADCAST_HASH_SECOND)
					.where(flinkKeys[0]).equalTo(flinkKeys[numJoinInputs-1])
					.with(new NaryHashJoinJoiner(node, numJoinInputs))
					.withParameters(this.getFlinkNodeConfig(node))
					.setParallelism(probeSideDOP)
					.returns(new TupleTypeInfo(outFields))
					.name("hashjoin-" + node.getID());
		}
	}
 
Example #14
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private DataSet<Tuple> translateLeftHashJoin(FlowNode node, List<DataSet<Tuple>> inputs, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys) {

		int numJoinInputs = inputs.size();

		// get out fields of node
		Scope outScope = getOutScope(node);
		Fields outFields;
		if (outScope.isEvery()) {
			outFields = outScope.getOutGroupingFields();
		} else {
			outFields = outScope.getOutValuesFields();
		}
		registerKryoTypes(outFields);

		int probeSideDOP = ((Operator)inputs.get(0)).getParallelism();

		if(numJoinInputs == 2) {
			// binary join

			return inputs.get(0)
					.leftOuterJoin(inputs.get(1), JoinHint.BROADCAST_HASH_SECOND)
					.where(flinkKeys[0]).equalTo(flinkKeys[1])
					.with(new BinaryHashJoinJoiner(node, inputFields[0], keyFields[0]))
					.withParameters(this.getFlinkNodeConfig(node))
					.setParallelism(probeSideDOP)
					.returns(new TupleTypeInfo(outFields))
					.name("hashjoin-" + node.getID());

		}
		else {
			// nary join

			TupleTypeInfo keysTypeInfo = inputFields[0].isDefined() ?
					new TupleTypeInfo(inputFields[0].select(keyFields[0])) :
					new TupleTypeInfo(Fields.UNKNOWN);
			keysTypeInfo.registerKeyFields(keyFields[0]);


			TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo =
					new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
							keysTypeInfo,
							new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, 1))
					);

			// prepare tuple list for join
			DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0)
					.map(new JoinPrepareMapper(numJoinInputs - 1, inputFields[0], keyFields[0]))
					.returns(tupleJoinListsTypeInfo)
					.setParallelism(probeSideDOP)
					.name("hashjoin-" + node.getID());

			for (int i = 0; i < flinkKeys[0].length; i++) {
				flinkKeys[0][i] = "f0." + i;
			}

			// join all inputs except last
			for (int i = 1; i < inputs.size()-1; i++) {

				tupleJoinListsTypeInfo =
						new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
								keysTypeInfo,
								new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, i+1))
						);

				tupleJoinLists = tupleJoinLists
						.join(inputs.get(i), JoinHint.BROADCAST_HASH_SECOND)
						.where(flinkKeys[0]).equalTo(flinkKeys[i])
						.with(new TupleAppendJoiner(i))
						.returns(tupleJoinListsTypeInfo)
						.withForwardedFieldsFirst(flinkKeys[0])
						.setParallelism(probeSideDOP)
						.name("hashjoin-" + node.getID());
			}

			// join last input
			return tupleJoinLists
					.leftOuterJoin(inputs.get(numJoinInputs-1), JoinHint.BROADCAST_HASH_SECOND)
					.where(flinkKeys[0]).equalTo(flinkKeys[numJoinInputs-1])
					.with(new NaryHashJoinJoiner(node, numJoinInputs))
					.withParameters(this.getFlinkNodeConfig(node))
					.setParallelism(probeSideDOP)
					.returns(new TupleTypeInfo(outFields))
					.name("hashjoin-" + node.getID());
		}
	}
 
Example #15
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private DataSet<Tuple> translateInnerCrossProduct(FlowNode node, List<DataSet<Tuple>> inputs) {

		int numJoinInputs = inputs.size();

		// get out fields of node
		Scope outScope = getOutScope(node);
		Fields outFields;
		if (outScope.isEvery()) {
			outFields = outScope.getOutGroupingFields();
		} else {
			outFields = outScope.getOutValuesFields();
		}
		registerKryoTypes(outFields);

		int probeSideDOP = ((Operator)inputs.get(0)).getParallelism();

		TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo =
				new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
						new TupleTypeInfo(Fields.UNKNOWN),
						ObjectArrayTypeInfo.getInfoFor(new TupleTypeInfo(Fields.UNKNOWN))
				);

		// prepare tuple list for join
		DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0)
				.map(new JoinPrepareMapper(numJoinInputs, null, null))
				.returns(tupleJoinListsTypeInfo)
				.setParallelism(probeSideDOP)
				.name("hashjoin-" + node.getID());

		for (int i = 1; i < inputs.size(); i++) {
			tupleJoinLists = tupleJoinLists.crossWithTiny(inputs.get(i))
					.with(new TupleAppendCrosser(i))
					.returns(tupleJoinListsTypeInfo)
					.setParallelism(probeSideDOP)
					.name("hashjoin-" + node.getID());
		}

		return tupleJoinLists
				.mapPartition(new HashJoinMapper(node))
				.withParameters(this.getFlinkNodeConfig(node))
				.setParallelism(probeSideDOP)
				.returns(new TupleTypeInfo(outFields))
				.name("hashjoin-" + node.getID());

	}
 
Example #16
Source File: BootstrapTransformationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private static <T> int getParallelism(DataSet<T> dataSet) {
	//All concrete implementations of DataSet are operators so this should always be safe.
	return ((Operator) dataSet).getParallelism();
}