Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#map()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#map() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NiFiSourceTopologyExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder()
			.url("http://localhost:8080/nifi")
			.portName("Data for Flink")
			.requestBatchCount(5)
			.buildConfig();

	SourceFunction<NiFiDataPacket> nifiSource = new NiFiSource(clientConfig);
	DataStream<NiFiDataPacket> streamSource = env.addSource(nifiSource).setParallelism(2);

	DataStream<String> dataStream = streamSource.map(new MapFunction<NiFiDataPacket, String>() {
		@Override
		public String map(NiFiDataPacket value) throws Exception {
			return new String(value.getContent(), Charset.defaultCharset());
		}
	});

	dataStream.print();
	env.execute();
}
 
Example 2
Source File: StreamSequence.java    From jstorm with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // get input data
        DataStream<Long> text = env.addSource(new SequenceSource(10)).slotSharingGroup("123");

        DataStream<Long> counts = text.map(new MapFunction<Long, Long>() {
            @Override
            public Long map(Long aLong) throws Exception {
                return aLong;
            }
        });

//        counts.print();

//        counts.print();

        // execute program
        env.execute("StreamSequence");
    }
 
Example 3
Source File: RollingSinkFaultToleranceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	RollingSink<String> sink = new RollingSink<String>(outPath)
			.setBucketer(new NonRollingBucketer())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}
 
Example 4
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test slot sharing is enabled.
 */
@Test
public void testEnableSlotSharing() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
	DataStream<Integer> mapDataStream = sourceDataStream.map(x -> x + 1);

	final List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(sourceDataStream.getTransformation());
	transformations.add(mapDataStream.getTransformation());

	// all stream nodes share default group by default
	StreamGraph streamGraph = new StreamGraphGenerator(
			transformations, env.getConfig(), env.getCheckpointConfig())
		.generate();

	Collection<StreamNode> streamNodes = streamGraph.getStreamNodes();
	for (StreamNode streamNode : streamNodes) {
		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, streamNode.getSlotSharingGroup());
	}
}
 
Example 5
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedPojoSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5));

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    DataStream<Integer> following = output.map(new MapFunction<Tuple4<Long, Integer, String, Double>, Integer>() {
        @Override
        public Integer map(Tuple4<Long, Integer, String, Double> value) throws Exception {
            return value.f1;
        }
    });
    String resultPath = tempFolder.newFile().toURI().toString();
    following.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 6
Source File: StreamGraphGeneratorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the json generated by JSONGenerator shall meet with 2 requirements:
 * 1. sink nodes are at the back
 * 2. if both two nodes are sink nodes or neither of them is sink node, then sort by its id.
 */
@Test
public void testSinkIdComparison() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> source = env.fromElements(1, 2, 3);
	for (int i = 0; i < 32; i++) {
		if (i % 2 == 0) {
			source.addSink(new SinkFunction<Integer>() {
				@Override
				public void invoke(Integer value, Context ctx) throws Exception {}
			});
		} else {
			source.map(x -> x + 1);
		}
	}
	// IllegalArgumentException will be thrown without FLINK-9216
	env.getStreamGraph().getStreamingPlanAsJSON();
}
 
Example 7
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the json generated by JSONGenerator shall meet with 2 requirements:
 * 1. sink nodes are at the back
 * 2. if both two nodes are sink nodes or neither of them is sink node, then sort by its id.
 */
@Test
public void testSinkIdComparison() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> source = env.fromElements(1, 2, 3);
	for (int i = 0; i < 32; i++) {
		if (i % 2 == 0) {
			source.addSink(new SinkFunction<Integer>() {
				@Override
				public void invoke(Integer value, Context ctx) throws Exception {}
			});
		} else {
			source.map(x -> x + 1);
		}
	}
	// IllegalArgumentException will be thrown without FLINK-9216
	env.getStreamGraph().getStreamingPlanAsJSON();
}
 
Example 8
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the json generated by JSONGenerator shall meet with 2 requirements:
 * 1. sink nodes are at the back
 * 2. if both two nodes are sink nodes or neither of them is sink node, then sort by its id.
 */
@Test
public void testSinkIdComparison() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> source = env.fromElements(1, 2, 3);
	for (int i = 0; i < 32; i++) {
		if (i % 2 == 0) {
			source.addSink(new SinkFunction<Integer>() {
				@Override
				public void invoke(Integer value, Context ctx) throws Exception {}
			});
		} else {
			source.map(x -> x + 1);
		}
	}
	// IllegalArgumentException will be thrown without FLINK-9216
	env.getStreamGraph().getStreamingPlanAsJSON();
}
 
Example 9
Source File: CsvTableSink.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public DataStreamSink<?> consumeDataStream(DataStream<Row> dataStream) {
	SingleOutputStreamOperator<String> csvRows =
		dataStream.map(new CsvFormatter(fieldDelim == null ? "," : fieldDelim));

	DataStreamSink<String> sink;
	if (writeMode != null) {
		sink = csvRows.writeAsText(path, writeMode);
	} else {
		sink = csvRows.writeAsText(path);
	}

	if (numFiles > 0) {
		csvRows.setParallelism(numFiles);
		sink.setParallelism(numFiles);
	} else {
		// if file number is not set, use input parallelism to make it chained.
		csvRows.setParallelism(dataStream.getParallelism());
		sink.setParallelism(dataStream.getParallelism());
	}

	sink.name(TableConnectorUtils.generateRuntimeName(CsvTableSink.class, fieldNames));

	return sink;
}
 
Example 10
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraphForManagedMemoryFractionTest(
	final List<ResourceSpec> resourceSpecs,
	@Nullable final List<Integer> managedMemoryWeights) throws Exception {

	final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) {
		}

		@Override
		public void cancel() {
		}
	});
	opMethod.invoke(source, resourceSpecs.get(0));

	// CHAIN(source -> map1) in default slot sharing group
	final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map1, resourceSpecs.get(1));

	// CHAIN(map2) in default slot sharing group
	final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map2, resourceSpecs.get(2));

	// CHAIN(map3) in test slot sharing group
	final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
	opMethod.invoke(map3, resourceSpecs.get(3));

	if (managedMemoryWeights != null) {
		source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0));
		map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1));
		map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2));
		map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3));
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
 
Example 11
Source File: FlinkTsFileStreamSource.java    From incubator-iotdb with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
	String path = "test.tsfile";
	TsFileUtils.writeTsFile(path);
	new File(path).deleteOnExit();
	String[] filedNames = {
		QueryConstant.RESERVED_TIME,
		"device_1.sensor_1",
		"device_1.sensor_2",
		"device_1.sensor_3",
		"device_2.sensor_1",
		"device_2.sensor_2",
		"device_2.sensor_3"
	};
	TypeInformation[] typeInformations = new TypeInformation[] {
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG
	};
	List<Path> paths = Arrays.stream(filedNames)
		.filter(s -> !s.equals(QueryConstant.RESERVED_TIME))
		.map(Path::new)
		.collect(Collectors.toList());
	RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, filedNames);
	QueryExpression queryExpression = QueryExpression.create(paths, null);
	RowRowRecordParser parser = RowRowRecordParser.create(rowTypeInfo, queryExpression.getSelectedSeries());
	TsFileInputFormat<Row> inputFormat = new TsFileInputFormat<>(queryExpression, parser);
	StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment();
	inputFormat.setFilePath("source.tsfile");
	DataStream<Row> source = senv.createInput(inputFormat);
	DataStream<String> rowString = source.map(Row::toString);
	Iterator<String> result = DataStreamUtils.collect(rowString);
	while (result.hasNext()) {
		System.out.println(result.next());
	}
}
 
Example 12
Source File: WordCountIntegrationTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void givenStreamOfEvents_whenProcessEvents_thenShouldPrintResultsOnSinkOperation() throws Exception {
    // given
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<String> text = env.fromElements("This is a first sentence", "This is a second sentence with a one word");

    SingleOutputStreamOperator<String> upperCase = text.map(String::toUpperCase);

    upperCase.print();

    // when
    env.execute();
}
 
Example 13
Source File: MapperTest.java    From flink-spector with Apache License 2.0 5 votes vote down vote up
/**
 * DataStream transformation to test.
 * Swaps the fields of a {@link Tuple2}
 *
 * @param stream input {@link DataStream}
 * @return {@link DataStream}
 */
public static DataStream<Tuple2<String, Integer>> swap(DataStream<Tuple2<Integer, String>> stream) {
    return stream.map(new MapFunction<Tuple2<Integer, String>, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> map(Tuple2<Integer, String> input) throws Exception {
            return input.swap();
        }
    });
}
 
Example 14
Source File: SummaryTreeReduce.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public DataStream<T> run(final DataStream<Edge<K, EV>> edgeStream) {
	TypeInformation<Tuple2<Integer, Edge<K, EV>>> basicTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, edgeStream.getType());

	TupleTypeInfo edgeTypeInfo = (TupleTypeInfo) edgeStream.getType();
	TypeInformation<S> partialAggType = TypeExtractor.createTypeInfo(EdgesFold.class, getUpdateFun().getClass(), 2, edgeTypeInfo.getTypeAt(0), edgeTypeInfo.getTypeAt(2));
	TypeInformation<Tuple2<Integer, S>> partialTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, partialAggType);

	degree = (degree == -1) ? edgeStream.getParallelism() : degree;
	
	DataStream<S> partialAgg = edgeStream
			.map(new PartitionMapper<>()).returns(basicTypeInfo)
			.setParallelism(degree)
			.keyBy(0)
			.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS))
			.fold(getInitialValue(), new PartialAgg<>(getUpdateFun(), partialAggType)).setParallelism(degree);
	//split here

	DataStream<Tuple2<Integer, S>> treeAgg = enhance(partialAgg.map(new PartitionMapper<>()).setParallelism(degree).returns(partialTypeInfo), partialTypeInfo);

	DataStream<S> resultStream = treeAgg.map(new PartitionStripper<>()).setParallelism(treeAgg.getParallelism())
			.timeWindowAll(Time.of(timeMillis, TimeUnit.MILLISECONDS))
			.reduce(getCombineFun())
			.flatMap(getAggregator(edgeStream)).setParallelism(1);

	return (getTransform() != null) ? resultStream.map(getTransform()) : (DataStream<T>) resultStream;
}
 
Example 15
Source File: BaseEvalClassStreamOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
@Override
public T linkFrom(StreamOperator<?>... inputs) {
    StreamOperator<?> in = checkAndGetFirst(inputs);
    String labelColName = this.get(MultiEvaluationStreamParams.LABEL_COL);
    String positiveValue = this.get(BinaryEvaluationStreamParams.POS_LABEL_VAL_STR);
    Integer timeInterval = this.get(MultiEvaluationStreamParams.TIME_INTERVAL);

    ClassificationEvaluationUtil.Type type = ClassificationEvaluationUtil.judgeEvaluationType(this.getParams());

    DataStream<BaseMetricsSummary> statistics;

    switch (type) {
        case PRED_RESULT: {
            String predResultColName = this.get(MultiEvaluationStreamParams.PREDICTION_COL);
            TableUtil.assertSelectedColExist(in.getColNames(), labelColName, predResultColName);

            LabelPredictionWindow predMultiWindowFunction = new LabelPredictionWindow(binary, positiveValue);
            statistics = in.select(new String[] {labelColName, predResultColName})
                .getDataStream()
                .timeWindowAll(Time.of(timeInterval, TimeUnit.SECONDS))
                .apply(predMultiWindowFunction);
            break;
        }
        case PRED_DETAIL: {
            String predDetailColName = this.get(MultiEvaluationStreamParams.PREDICTION_DETAIL_COL);
            TableUtil.assertSelectedColExist(in.getColNames(), labelColName, predDetailColName);

            PredDetailLabel eval = new PredDetailLabel(positiveValue, binary);

            statistics = in.select(new String[] {labelColName, predDetailColName})
                .getDataStream()
                .timeWindowAll(Time.of(timeInterval, TimeUnit.SECONDS))
                .apply(eval);
            break;
        }
        default: {
            throw new RuntimeException("Error Input");
        }
    }
    DataStream<BaseMetricsSummary> totalStatistics = statistics
        .map(new EvaluationUtil.AllDataMerge())
        .setParallelism(1);

    DataStream<Row> windowOutput = statistics.map(
        new EvaluationUtil.SaveDataStream(ClassificationEvaluationUtil.WINDOW.f0));
    DataStream<Row> allOutput = totalStatistics.map(
        new EvaluationUtil.SaveDataStream(ClassificationEvaluationUtil.ALL.f0));

    DataStream<Row> union = windowOutput.union(allOutput);

    this.setOutput(union,
        new String[] {ClassificationEvaluationUtil.STATISTICS_OUTPUT, DATA_OUTPUT},
        new TypeInformation[] {Types.STRING, Types.STRING});

    return (T)this;
}
 
Example 16
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether virtual Transformations behave correctly.
 *
 * <p>Checks whether output selector, partitioning works correctly when applied on a union.
 */
@Test
public void testVirtualTransformations2() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	DataStream<Integer> rebalanceMap = source.rebalance().map(new NoOpIntMap());

	DataStream<Integer> map1 = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map2 = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map3 = rebalanceMap
			.map(new NoOpIntMap());

	EvenOddOutputSelector selector = new EvenOddOutputSelector();

	SingleOutputStreamOperator<Integer> unionedMap = map1.union(map2).union(map3)
			.broadcast()
			.split(selector)
			.select("foo")
			.map(new NoOpIntMap());

	unionedMap.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	// verify that the properties are correctly set on all input operators
	assertTrue(graph.getStreamNode(map1.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map1.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
	assertTrue(graph.getStreamNode(map1.getId()).getOutputSelectors().contains(selector));

	assertTrue(graph.getStreamNode(map2.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map2.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
	assertTrue(graph.getStreamNode(map2.getId()).getOutputSelectors().contains(selector));

	assertTrue(graph.getStreamNode(map3.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map3.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
	assertTrue(graph.getStreamNode(map3.getId()).getOutputSelectors().contains(selector));

}
 
Example 17
Source File: StreamGraphGeneratorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether virtual Transformations behave correctly.
 *
 * <p>Verifies that partitioning, output selector, selected names are correctly set in the
 * StreamGraph when they are intermixed.
 */
@Test
public void testVirtualTransformations() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	DataStream<Integer> rebalanceMap = source.rebalance().map(new NoOpIntMap());

	// verify that only the partitioning that was set last is used
	DataStream<Integer> broadcastMap = rebalanceMap
			.forward()
			.global()
			.broadcast()
			.map(new NoOpIntMap());

	broadcastMap.addSink(new DiscardingSink<>());

	// verify that partitioning is preserved across union and split/select
	EvenOddOutputSelector selector1 = new EvenOddOutputSelector();
	EvenOddOutputSelector selector2 = new EvenOddOutputSelector();
	EvenOddOutputSelector selector3 = new EvenOddOutputSelector();

	DataStream<Integer> map1Operator = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map1 = map1Operator
			.broadcast()
			.split(selector1)
			.select("even");

	DataStream<Integer> map2Operator = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map2 = map2Operator
			.split(selector2)
			.select("odd")
			.global();

	DataStream<Integer> map3Operator = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map3 = map3Operator
			.global()
			.split(selector3)
			.select("even")
			.shuffle();

	SingleOutputStreamOperator<Integer> unionedMap = map1.union(map2).union(map3)
			.map(new NoOpIntMap());

	unionedMap.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	// rebalanceMap
	assertTrue(graph.getStreamNode(rebalanceMap.getId()).getInEdges().get(0).getPartitioner() instanceof RebalancePartitioner);

	// verify that only last partitioning takes precedence
	assertTrue(graph.getStreamNode(broadcastMap.getId()).getInEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertEquals(rebalanceMap.getId(), graph.getSourceVertex(graph.getStreamNode(broadcastMap.getId()).getInEdges().get(0)).getId());

	// verify that partitioning in unions is preserved and that it works across split/select
	assertTrue(graph.getStreamNode(map1Operator.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map1Operator.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("even"));
	assertTrue(graph.getStreamNode(map1Operator.getId()).getOutputSelectors().contains(selector1));

	assertTrue(graph.getStreamNode(map2Operator.getId()).getOutEdges().get(0).getPartitioner() instanceof GlobalPartitioner);
	assertTrue(graph.getStreamNode(map2Operator.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("odd"));
	assertTrue(graph.getStreamNode(map2Operator.getId()).getOutputSelectors().contains(selector2));

	assertTrue(graph.getStreamNode(map3Operator.getId()).getOutEdges().get(0).getPartitioner() instanceof ShufflePartitioner);
	assertTrue(graph.getStreamNode(map3Operator.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("even"));
	assertTrue(graph.getStreamNode(map3Operator.getId()).getOutputSelectors().contains(selector3));
}
 
Example 18
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether virtual Transformations behave correctly.
 *
 * <p>Checks whether output selector, partitioning works correctly when applied on a union.
 */
@Test
public void testVirtualTransformations2() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	DataStream<Integer> rebalanceMap = source.rebalance().map(new NoOpIntMap());

	DataStream<Integer> map1 = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map2 = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map3 = rebalanceMap
			.map(new NoOpIntMap());

	EvenOddOutputSelector selector = new EvenOddOutputSelector();

	SingleOutputStreamOperator<Integer> unionedMap = map1.union(map2).union(map3)
			.broadcast()
			.split(selector)
			.select("foo")
			.map(new NoOpIntMap());

	unionedMap.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	// verify that the properties are correctly set on all input operators
	assertTrue(graph.getStreamNode(map1.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map1.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
	assertTrue(graph.getStreamNode(map1.getId()).getOutputSelectors().contains(selector));

	assertTrue(graph.getStreamNode(map2.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map2.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
	assertTrue(graph.getStreamNode(map2.getId()).getOutputSelectors().contains(selector));

	assertTrue(graph.getStreamNode(map3.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map3.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
	assertTrue(graph.getStreamNode(map3.getId()).getOutputSelectors().contains(selector));

}
 
Example 19
Source File: BasicTransformations.java    From examples-java with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // use event time for the application
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        // configure watermark interval
        env.getConfig().setAutoWatermarkInterval(1000L);

        // ingest sensor stream
        DataStream<SensorReading> readings = env
            // SensorSource generates random temperature readings
            .addSource(new SensorSource())
            // assign timestamps and watermarks which are required for event time
            .assignTimestampsAndWatermarks(new SensorTimeAssigner());

        // filter out sensor measurements with temperature below 25 degrees
        DataStream<SensorReading> filteredReadings = readings
            .filter(r -> r.temperature >= 25);

        // the above filter transformation using a FilterFunction instead of a lambda function
        // DataStream<SensorReading> filteredReadings = readings
        //     .filter(new TemperatureFilter(25));

        // project the reading to the id of the sensor
        DataStream<String> sensorIds = filteredReadings
            .map(r -> r.id);

        // the above map transformation using a MapFunction instead of a lambda function
        // DataStream<String> sensorIds = filteredReadings
        //     .map(new IdExtractor());

        // split the String id of each sensor to the prefix "sensor" and sensor number
        DataStream<String> splitIds = sensorIds
            .flatMap((FlatMapFunction<String, String>)
                    (id, out) -> { for (String s: id.split("_")) { out.collect(s);}})
            // provide result type because Java cannot infer return type of lambda function
            .returns(Types.STRING);

        // the above flatMap transformation using a FlatMapFunction instead of a lambda function
        // DataStream<String> splitIds = sensorIds
        //         .flatMap(new IdSplitter());

        // print result stream to standard out
        splitIds.print();

        // execute application
        env.execute("Basic Transformations Example");
    }
 
Example 20
Source File: DemonstrationOfTumblingTableSQLFunction.java    From yauaa with Apache License 2.0 4 votes vote down vote up
@Disabled
@Test
public void runDemonstration() throws Exception {
    // The base input stream
    StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment();
    senv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    senv.getConfig().setAutoWatermarkInterval(1000);

    DataStream<Tuple4<Long, String, String, String>> inputStream = senv
        .addSource(new UAStreamSource())
        .assignTimestampsAndWatermarks(new UAWatermarker());

    // The table environment
    StreamTableEnvironment tableEnv = StreamTableEnvironment.create(senv);

    // Give the stream a Table Name
    tableEnv.createTemporaryView("AgentStream", inputStream, "eventTime.rowtime, useragent, expectedDeviceClass, expectedAgentNameVersionMajor");

    // register the function
    tableEnv.registerFunction("ParseUserAgent", new AnalyzeUseragentFunction("DeviceClass", "AgentNameVersionMajor"));

    int windowIntervalCount =  5;
    String windowIntervalScale =  "MINUTE";

    String sqlQuery = String.format(
        "SELECT" +
        "   TUMBLE_START(eventTime, INTERVAL '%d' %s) AS wStart," +
        "   deviceClass," +
        "   agentNameVersionMajor," +
        "   expectedDeviceClass," +
        "   expectedAgentNameVersionMajor," +
        "   Count('') " +
        "FROM ( "+
        "    SELECT " +
        "       eventTime, " +
        "       parsedUserAgent['DeviceClass'          ]  AS deviceClass," +
        "       parsedUserAgent['AgentNameVersionMajor']  AS agentNameVersionMajor," +
        "       expectedDeviceClass," +
        "       expectedAgentNameVersionMajor" +
        "    FROM ( "+
        "        SELECT " +
        "           eventTime, " +
        "           ParseUserAgent(useragent) AS parsedUserAgent," +
        "           expectedDeviceClass," +
        "           expectedAgentNameVersionMajor" +
        "        FROM AgentStream " +
        "    )" +
        ")" +
        "GROUP BY TUMBLE(eventTime, INTERVAL '%d' %s), " +
            "       deviceClass," +
            "       agentNameVersionMajor," +
            "       expectedDeviceClass," +
            "       expectedAgentNameVersionMajor",
        windowIntervalCount, windowIntervalScale,
        windowIntervalCount, windowIntervalScale
        );
    Table resultTable = tableEnv.sqlQuery(sqlQuery);

    TypeInformation<Row> tupleType = new RowTypeInfo(SQL_TIMESTAMP, STRING, STRING, STRING, STRING, LONG);
    DataStream<Row>      resultSet = tableEnv.toAppendStream(resultTable, tupleType);

    resultSet.print();

    resultSet.map((MapFunction<Row, String>) row -> {
        Object useragent                      = row.getField(0);
        Object deviceClass                    = row.getField(1);
        Object agentNameVersionMajor          = row.getField(2);
        Object expectedDeviceClass            = row.getField(3);
        Object expectedAgentNameVersionMajor  = row.getField(4);

        assertEquals(
            expectedDeviceClass,
            deviceClass,
            "Wrong DeviceClass: " + useragent);

        assertEquals(
            expectedAgentNameVersionMajor,
            agentNameVersionMajor,
            "Wrong AgentNameVersionMajor: " + useragent);

        return useragent.toString();
    });

    senv.execute();
}