org.apache.flink.api.java.operators.SingleInputUdfOperator Java Examples

The following examples show how to use org.apache.flink.api.java.operators.SingleInputUdfOperator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: DataSetConversionUtil.java From Alink with Apache License 2.0

6 votes

/**
 * Convert the given DataSet into a Table with specified colNames and colTypes.
 *
 * @param session the MLEnvironment using to convert DataSet to Table.
 * @param data     the DataSet to convert.
 * @param colNames the specified colNames.
 * @param colTypes the specified colTypes. This variable is used only when the
 *                 DataSet is produced by a function and Flink cannot determine
 *                 automatically what the produced type is.
 * @return the converted Table.
 */
public static Table toTable(MLEnvironment session, DataSet <Row> data, String[] colNames, TypeInformation <?>[] colTypes) {
	try {
		// Try to add row type information for the dataset to be converted.
		// In most case, this keeps us from the rolling back logic in the catch block,
		// which adds an unnecessary map function just in order to add row type information.
		if (data instanceof SingleInputUdfOperator) {
			((SingleInputUdfOperator) data).returns(new RowTypeInfo(colTypes, colNames));
		} else if (data instanceof TwoInputUdfOperator) {
			((TwoInputUdfOperator) data).returns(new RowTypeInfo(colTypes, colNames));
		}
		return toTable(session, data, colNames);
	} catch (Exception ex) {
		if (null == colTypes) {
			throw ex;
		} else {
			DataSet <Row> t = getDataSetWithExplicitTypeDefine(data, colNames, colTypes);
			return toTable(session, t, colNames);
		}
	}
}

Example #2

Source File: DataSetConversionUtil.java From flink with Apache License 2.0

6 votes

/**
 * Convert the given DataSet into a Table with specified colNames and colTypes.
 *
 * @param session the MLEnvironment using to convert DataSet to Table.
 * @param data     the DataSet to convert.
 * @param colNames the specified colNames.
 * @param colTypes the specified colTypes. This variable is used only when the
 *                 DataSet is produced by a function and Flink cannot determine
 *                 automatically what the produced type is.
 * @return the converted Table.
 */
public static Table toTable(MLEnvironment session, DataSet <Row> data, String[] colNames, TypeInformation <?>[] colTypes) {
	try {
		if (null != colTypes) {
			// Try to add row type information for the dataset to be converted.
			// In most case, this keeps us from the rolling back logic in the catch block,
			// which adds an unnecessary map function just in order to add row type information.
			if (data instanceof SingleInputUdfOperator) {
				((SingleInputUdfOperator) data).returns(new RowTypeInfo(colTypes, colNames));
			} else if (data instanceof TwoInputUdfOperator) {
				((TwoInputUdfOperator) data).returns(new RowTypeInfo(colTypes, colNames));
			}
		}
		return toTable(session, data, colNames);
	} catch (ValidationException ex) {
		// currently ValidationException will be thrown and caught for further processing.
		// Because the getType() API of the Transformation can only be accessed once.
		// This can be improve if we add "isTypeSet()" API to the Transformation class.
		if (null == colTypes) {
			throw ex;
		} else {
			DataSet <Row> t = fallbackToExplicitTypeDefine(data, colNames, colTypes);
			return toTable(session, t, colNames);
		}
	}
}

Example #3

Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0

5 votes

private static void transformSideInputs(
    List<PCollectionView<?>> sideInputs,
    SingleInputUdfOperator<?, ?, ?> outputDataSet,
    FlinkBatchTranslationContext context) {
  // get corresponding Flink broadcast DataSets
  for (PCollectionView<?> input : sideInputs) {
    DataSet<?> broadcastSet = context.getSideInputDataSet(input);
    outputDataSet.withBroadcastSet(broadcastSet, input.getTagInternal().getId());
  }
}

Example #4

Source File: PregelTranslationTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}

Example #5

Source File: GSATranslationTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testTranslation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcGather = env.fromElements(1L);
	DataSet<Long> bcSum = env.fromElements(1L);
	DataSet<Long> bcApply = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> result;

	// ------------ construct the test program ------------------

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(
		1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<>());

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);

	GSAConfiguration parameters = new GSAConfiguration();

	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
	parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
	parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);

	result = graph.runGatherSumApplyIteration(
		new GatherNeighborIds(), new SelectMinId(),
		new UpdateComponentId(), NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	SingleInputUdfOperator<?, ?, ?> sumReduce = (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
	SingleInputUdfOperator<?, ?, ?> gatherMap = (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
	assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
	assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
}

Example #6

Source File: PregelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}

Example #7

Source File: GSATranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcGather = env.fromElements(1L);
	DataSet<Long> bcSum = env.fromElements(1L);
	DataSet<Long> bcApply = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> result;

	// ------------ construct the test program ------------------

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(
		1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<>());

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);

	GSAConfiguration parameters = new GSAConfiguration();

	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
	parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
	parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);

	result = graph.runGatherSumApplyIteration(
		new GatherNeighborIds(), new SelectMinId(),
		new UpdateComponentId(), NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	SingleInputUdfOperator<?, ?, ?> sumReduce = (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
	SingleInputUdfOperator<?, ?, ?> gatherMap = (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
	assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
	assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
}

Example #8

Source File: PregelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}

Example #9

Source File: GSATranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcGather = env.fromElements(1L);
	DataSet<Long> bcSum = env.fromElements(1L);
	DataSet<Long> bcApply = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> result;

	// ------------ construct the test program ------------------

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(
		1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<>());

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);

	GSAConfiguration parameters = new GSAConfiguration();

	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
	parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
	parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);

	result = graph.runGatherSumApplyIteration(
		new GatherNeighborIds(), new SelectMinId(),
		new UpdateComponentId(), NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	SingleInputUdfOperator<?, ?, ?> sumReduce = (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
	SingleInputUdfOperator<?, ?, ?> gatherMap = (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
	assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
	assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
}