org.apache.flink.streaming.api.datastream.IterativeStream Java Examples

The following examples show how to use org.apache.flink.streaming.api.datastream.IterativeStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 7 votes vote down vote up
@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}
 
Example #2
Source File: FlinkTopology.java    From incubator-samoa with Apache License 2.0 6 votes vote down vote up
private void initializeCycle(int cycleID) {
    //get the head and tail of cycle
    FlinkProcessingItem tail = cycles.get(cycleID).get(0);
    FlinkProcessingItem head = cycles.get(cycleID).get(cycles.get(cycleID).size() - 1);

    //initialise source stream of the iteration, so as to use it for the iteration starting point
    if (!head.isInitialised()) {
        head.setOnIteration(true);
        head.initialise();
        head.initialiseStreams();
    }

    //initialise all nodes after head
    for (int node = cycles.get(cycleID).size() - 2; node >= 0; node--) {
        FlinkProcessingItem processingItem = cycles.get(cycleID).get(node);
        processingItem.initialise();
        processingItem.initialiseStreams();
    }

    SingleOutputStreamOperator backedge = (SingleOutputStreamOperator) head.getInputStreamBySourceID(tail.getComponentId()).getOutStream();
    backedge.setParallelism(head.getParallelism());
    ((IterativeStream) head.getDataStream()).closeWith(backedge);
}
 
Example #3
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	StreamGraph streamGraph = env.getStreamGraph();
	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}
 
Example #4
Source File: IterateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}
 
Example #5
Source File: IterativeConnectedComponents.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env);

		IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate();
		DataStream<Tuple2<Long, Long>> result = iteration.closeWith(
				iteration.keyBy(0).flatMap(new AssignComponents()));

		// Emit the results
		result.print();

		env.execute("Streaming Connected Components");
	}
 
Example #6
Source File: IterateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}
 
Example #7
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	final ResourceSpec resources = ResourceSpec.newBuilder(1.0, 100).build();
	iteration.getTransformation().setResources(resources, resources);

	StreamGraph streamGraph = env.getStreamGraph();
	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());

		final ResourceSpec sourceMinResources = iterationPair.f0.getMinResources();
		final ResourceSpec sinkMinResources = iterationPair.f1.getMinResources();
		final ResourceSpec iterationResources = sourceMinResources.merge(sinkMinResources);
		assertThat(iterationResources, equalsResourceSpec(resources));
	}
}
 
Example #8
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example #9
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example #10
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}
 
Example #11
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}
 
Example #12
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example #13
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}
 
Example #14
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test iteration job when disable slot sharing, check slot sharing group and co-location group.
 */
@Test
public void testIterationWithSlotSharingDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());

	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);
	StreamGraph streamGraph = generator.generate();

	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertNotNull(iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}
 
Example #15
Source File: IterateExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000L);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}
 
Example #16
Source File: IterateExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}
 
Example #17
Source File: IterateExample.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
        env.getConfig().setGlobalJobParameters(params);



        IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource())
                .map(new InputMap())
                .iterate(5000);

        SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
                .split(new MySelector());

        it.closeWith(step.select("iterate"));

        step.select("output")
                .map(new OutputMap())
                .print();

        env.execute("Streaming Iteration Example");
    }
 
Example #18
Source File: FtrlTrainStreamOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
@Override
public FtrlTrainStreamOp linkFrom(StreamOperator<?>... inputs) {
    checkOpSize(1, inputs);
    int vectorSize = getVectorSize();
    boolean hasInterceptItem = getWithIntercept();
    int vectorTrainIdx = getVectorCol() != null ?
        TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), getVectorCol()) : -1;
    int labelIdx = TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), getLabelCol());
    String[] featureCols = getFeatureCols();
    int[] featureIdx = null;
    int featureColLength = -1;
    if (vectorTrainIdx == -1) {
        featureIdx = new int[featureCols.length];
        for (int i = 0; i < featureCols.length; ++i) {
            featureIdx[i] = TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), featureCols[i]);
        }
        featureColLength = featureCols.length;
    }
    final TypeInformation labelType = inputs[0].getColTypes()[labelIdx];
    int parallelism = MLEnvironmentFactory.get(getMLEnvironmentId())
        .getStreamExecutionEnvironment().getParallelism();
    int featureSize = vectorTrainIdx != -1 ? vectorSize : featureColLength;
    final int[] splitInfo = getSplitInfo(featureSize, hasInterceptItem, parallelism);

    DataStream<Row> initData = inputs[0].getDataStream();

    // Tuple5<SampleId, taskId, numSubVec, SubVec, label>
    DataStream<Tuple5<Long, Integer, Integer, Vector, Object>> input
        = initData.flatMap(new SplitVector(splitInfo, hasInterceptItem, vectorSize,
        vectorTrainIdx, featureIdx, labelIdx))
        .partitionCustom(new CustomBlockPartitioner(), 1);

    // train data format = <sampleId, subSampleTaskId, subNum, SparseVector(subSample), label>
    // feedback format = Tuple7<sampleId, subSampleTaskId, subNum, SparseVector(subSample), label, wx,
    // timeStamps>
    IterativeStream.ConnectedIterativeStreams<Tuple5<Long, Integer, Integer, Vector, Object>,
        Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>
        iteration = input.iterate(Long.MAX_VALUE)
        .withFeedbackType(TypeInformation
            .of(new TypeHint<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {}));

    DataStream iterativeBody = iteration.flatMap(
        new CalcTask(dataBridge, splitInfo, getParams()))
        .keyBy(0)
        .flatMap(new ReduceTask(parallelism, splitInfo))
        .partitionCustom(new CustomBlockPartitioner(), 1);

    DataStream<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>
        result = iterativeBody.filter(
        new FilterFunction<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {
            @Override
            public boolean filter(Tuple7<Long, Integer, Integer, Vector, Object, Double, Long> t3)
                throws Exception {
                // if t3.f0 > 0 && t3.f2 > 0 then feedback
                return (t3.f0 > 0 && t3.f2 > 0);
            }
        });

    DataStream<Row> output = iterativeBody.filter(
        new FilterFunction<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {
            @Override
            public boolean filter(Tuple7<Long, Integer, Integer, Vector, Object, Double, Long> value)
                throws Exception {
                /* if value.f0 small than 0, then output */
                return value.f0 < 0;
            }
        }).flatMap(new WriteModel(labelType, getVectorCol(), featureCols, hasInterceptItem));

    iteration.closeWith(result);

    TableSchema schema = new LinearModelDataConverter(labelType).getModelSchema();

    TypeInformation[] types = new TypeInformation[schema.getFieldTypes().length + 2];
    String[] names = new String[schema.getFieldTypes().length + 2];
    names[0] = "bid";
    names[1] = "ntab";
    types[0] = Types.LONG;
    types[1] = Types.LONG;
    for (int i = 0; i < schema.getFieldTypes().length; ++i) {
        types[i + 2] = schema.getFieldTypes()[i];
        names[i + 2] = schema.getFieldNames()[i];
    }

    this.setOutput(output, names, types);
    return this;
}
 
Example #19
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup();
	assertNotNull(slotSharingGroup);

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		// all vertices have same slot sharing group by default
		assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup());

		// all iteration vertices have same co-location group,
		// others have no co-location group by default
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex));
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex));
		} else {
			assertNull(jobVertex.getCoLocationGroup());
		}
	}

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}
 
Example #20
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup();
	assertNotNull(slotSharingGroup);

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		// all vertices have same slot sharing group by default
		assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup());

		// all iteration vertices have same co-location group,
		// others have no co-location group by default
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex));
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex));
		} else {
			assertNull(jobVertex.getCoLocationGroup());
		}
	}

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}
 
Example #21
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}
 
Example #22
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}
 
Example #23
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}
 
Example #24
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}
 
Example #25
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}
 
Example #26
Source File: IterateExample.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
        env.getConfig().setGlobalJobParameters(params);



        IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource())
                .map(new InputMap())
                .iterate(5000);

        SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
                .split(new MySelector());

        it.closeWith(step.select("iterate"));

        step.select("output")
                .map(new OutputMap())
                .print();

        env.execute("Streaming Iteration Example");
    }
 
Example #27
Source File: IterateExample.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}
 
Example #28
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test slot sharing group is enabled or disabled for iteration.
 */
@Test
public void testDisableSlotSharingForIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());
	// when slot sharing group is disabled
	// all job vertices except iteration vertex would have no slot sharing group
	// iteration vertices would be set slot sharing group automatically
	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(generator.generate());

	SlotSharingGroup iterationSourceSlotSharingGroup = null;
	SlotSharingGroup iterationSinkSlotSharingGroup = null;

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceSlotSharingGroup = jobVertex.getSlotSharingGroup();
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkSlotSharingGroup = jobVertex.getSlotSharingGroup();
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
		} else {
			assertNull(jobVertex.getSlotSharingGroup());
		}
	}

	assertNotNull(iterationSourceSlotSharingGroup);
	assertNotNull(iterationSinkSlotSharingGroup);
	assertEquals(iterationSourceSlotSharingGroup, iterationSinkSlotSharingGroup);

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}
 
Example #29
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}
 
Example #30
Source File: PythonIterativeStream.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
PythonIterativeStream(IterativeStream<PyObject> iterativeStream) {
	super(iterativeStream);
}