org.apache.flink.streaming.api.datastream.DataStream Java Examples

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: DataStreamTest.java From flink with Apache License 2.0

7 votes

/**
 * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics.
 */
@Test
public void testErgonomicWatermarkStrategy() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> input = env.fromElements("bonjour");

	// as soon as you have a chain of methods the first call needs a generic
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.forBoundedOutOfOrderness(Duration.ofMillis(10)));

	// as soon as you have a chain of methods the first call needs to specify the generic type
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.<String>forBoundedOutOfOrderness(Duration.ofMillis(10))
					.withTimestampAssigner((event, timestamp) -> 42L));
}

Example #2

Source File: Kafka011Example.java From Flink-CEPplus with Apache License 2.0

7 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}

Example #3

Source File: BucketingSinkFaultToleranceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}

Example #4

Source File: ReadTextFile.java From blog_demos with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度为1
    env.setParallelism(1);

    //用txt文件作为数据源
    DataStream<String> textDataStream = env.readTextFile("file:///Users/zhaoqin/temp/202003/14/README.txt", "UTF-8");

    //统计单词数量并打印出来
    textDataStream
            .flatMap(new Splitter())
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : readTextFile");
}

Example #5

Source File: Union.java From da-streamingledger with Apache License 2.0

6 votes

/**
 * Union differently typed {@link DataStream}s into single {@code DataStream}.
 *
 * <p>The resulting {@code DataStream} is of type {@link TaggedElement} where
 * {@link TaggedElement#getDataStreamTag()} corresponds to the list position of the source {@code DataStream} in
 * {@code inputs} that produced that element, and {@link TaggedElement#getElement()} is the element produced.
 *
 * @param inputs the input data streams to union.
 * @return a {@code DataStream} that corresponds to the union of all the input {@link DataStream}s
 */
public static DataStream<TaggedElement> apply(List<DataStream<?>> inputs) {
    checkArgument(!inputs.isEmpty(), "union requires at least one input data stream.");

    List<DataStream<TaggedElement>> taggedInputs = tagInputStreams(inputs);
    if (taggedInputs.size() == 1) {
        return taggedInputs.get(0);
    }
    DataStream<TaggedElement> first = taggedInputs.get(0);
    List<DataStream<TaggedElement>> restList = taggedInputs.subList(1, taggedInputs.size());

    @SuppressWarnings({"unchecked", "raw"})
    DataStream<TaggedElement>[] restArray = (DataStream<TaggedElement>[]) new DataStream[restList.size()];
    DataStream<TaggedElement>[] rest = restList.toArray(restArray);
    return first.union(rest);
}

Example #6

Source File: AbstractNonKeyedOperatorRestoreTestBase.java From flink with Apache License 2.0

6 votes

@Override
public void createMigrationJob(StreamExecutionEnvironment env) {
	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless);
}

Example #7

Source File: GroupedProcessingTimeWindowExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);

		DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource());

		stream
			.keyBy(0)
			.timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
			.reduce(new SummingReducer())

			// alternative: use a apply function which does not pre-aggregate
//			.keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>())
//			.window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
//			.apply(new SummingWindowFunction())

			.addSink(new SinkFunction<Tuple2<Long, Long>>() {
				@Override
				public void invoke(Tuple2<Long, Long> value) {
				}
			});

		env.execute();
	}

Example #8

Source File: StreamWordCountExample.java From toolbox with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .fromElements("Who's there?",
                        "I think I hear them. Stand, ho! Who's there?")
                //.socketTextStream("localhost", 9999)
                .flatMap(new Splitter())
                .keyBy(0)
                .sum(1);

        dataStream.print();

        env.execute();

        //env.execute("Socket Stream WordCount");
    }

Example #9

Source File: IntervalJoinITCase.java From flink with Apache License 2.0

6 votes

@Test(expected = UnsupportedTimeCharacteristicException.class)
public void testExecutionFailsInProcessingTime() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1));
	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1));

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(0))
		.process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {
			@Override
			public void processElement(Tuple2<String, Integer> left,
				Tuple2<String, Integer> right, Context ctx,
				Collector<String> out) throws Exception {
				out.collect(left + ":" + right);
			}
		});
}

Example #10

Source File: CollectITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}

Example #11

Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
@SuppressWarnings("rawtypes")
public void testReduceEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}

Example #12

Source File: StreamTaskTimerITCase.java From flink with Apache License 2.0

6 votes

/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		verifyJobExecutionException(e);
	}
}

Example #13

Source File: SiddhiCEPITCase.java From bahir-flink with Apache License 2.0

6 votes

@Test
public void testUnboundedTupleSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple4<Integer, String, Double, Long>> input = env
        .addSource(new RandomTupleSource(5).closeDelay(1500)).keyBy(1);

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}

Example #14

Source File: FlinkPulsarITest.java From pulsar-flink with Apache License 2.0

6 votes

@Test
public void testStartFromSpecific() throws Exception {
    String topic = newTopic();
    List<MessageId> mids = sendTypedMessages(topic, SchemaType.INT32, Arrays.asList(
            //  0,   1,   2, 3, 4, 5,  6,  7,  8
            -20, -21, -22, 1, 2, 3, 10, 11, 12), Optional.empty());

    Map<String, Set<Integer>> expectedData = new HashMap<>();
    expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12)));

    Map<String, MessageId> offset = new HashMap<>();
    offset.put(topic, mids.get(3));

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    Properties sourceProps = sourceProperties();
    sourceProps.setProperty(TOPIC_SINGLE_OPTION_KEY, topic);
    DataStream stream = see.addSource(
            new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromSpecificOffsets(offset));
    stream.flatMap(new CheckAllMessageExist(expectedData, 5)).setParallelism(1);

    TestUtils.tryExecute(see, "start from specific");
}

Example #15

Source File: KafkaTestEnvironmentImpl.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer<T>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}

Example #16

Source File: KafkaTopicValidator.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}

Example #17

Source File: KafkaTestEnvironmentImpl.java From flink with Apache License 2.0

5 votes

@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer011<>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer011.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}

Example #18

Source File: MyRetractStreamTableSink.java From flink-learning with Apache License 2.0

5 votes

@Override
public DataStreamSink<?> consumeDataStream(DataStream<Tuple2<Boolean, Row>> dataStream) {
    return dataStream.addSink(new SinkFunction<Tuple2<Boolean, Row>>() {
        @Override
        public void invoke(Tuple2<Boolean, Row> value, Context context) throws Exception {
            //自定义Sink
            // f0==true :插入新数据
            // f0==false:删除旧数据
            if (value.f0) {
                //可以写入MySQL、Kafka或者发HttpPost...根据具体情况开发
                System.out.println(value.f1);
            }
        }
    });
}

Example #19

Source File: KafkaTableSourceBase.java From flink with Apache License 2.0

5 votes

/**
 * NOTE: This method is for internal use only for defining a TableSource.
 *       Do not use it in Table API programs.
 */
@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {

	DeserializationSchema<Row> deserializationSchema = getDeserializationSchema();
	// Version-specific Kafka consumer
	FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
	return env.addSource(kafkaConsumer).name(explainSource());
}

Example #20

Source File: DataStreamConversionUtil.java From Alink with Apache License 2.0

5 votes

/**
 * Convert the given DataStream to Table with specified colNames.
 *
 * @param session the MLEnvironment using to convert DataSet to Table.
 * @param data     the DataStream to convert.
 * @param colNames the specified colNames.
 * @return the converted Table.
 */
public static Table toTable(MLEnvironment session, DataStream <Row> data, String[] colNames) {
	if (null == colNames || colNames.length == 0) {
		return session.getStreamTableEnvironment().fromDataStream(data);
	} else {
		StringBuilder sbd = new StringBuilder();
		sbd.append(colNames[0]);
		for (int i = 1; i < colNames.length; i++) {
			sbd.append(",").append(colNames[i]);
		}
		return session.getStreamTableEnvironment().fromDataStream(data, sbd.toString());
	}
}

Example #21

Source File: KafkaShuffleITCase.java From flink with Apache License 2.0

5 votes

private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer(
		String topic,
		StreamExecutionEnvironment env,
		int numberOfPartitions,
		int producerParallelism,
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	createTestTopic(topic, numberOfPartitions, 1);

	env.setParallelism(producerParallelism);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(timeCharacteristic);

	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source;

	Properties properties = kafkaServer.getStandardProperties();
	Properties kafkaProperties = PropertiesUtil.flatten(properties);

	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));
	kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0);

	env.execute("Write to " + topic);
	ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder();

	for (int p = 0; p < numberOfPartitions; p++) {
		results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000));
	}

	deleteTestTopic(topic);

	return results.build();
}

Example #22

Source File: AllWindowTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
@SuppressWarnings("rawtypes")
public void testFoldWithProcessAllWindowFunctionEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window = source
			.windowAll(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.fold(new Tuple3<>("", "", 0), new DummyFolder(), new ProcessAllWindowFunction<Tuple3<String, String, Integer>, Tuple2<String, Integer>, TimeWindow>() {
				private static final long serialVersionUID = 1L;
				@Override
				public void process(
						Context ctx,
						Iterable<Tuple3<String, String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple3<String, String, Integer> in : values) {
						out.collect(new Tuple2<>(in.f0, in.f2));
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof FoldingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}

Example #23

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Example #24

Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
@SuppressWarnings("rawtypes")
public void testProcessWithCustomTrigger() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.trigger(CountTrigger.of(1))
			.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(in);
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}

Example #25

Source File: SiddhiCEP.java From bahir-flink with Apache License 2.0

5 votes

/**
 * Register stream with unique <code>streaId</code>, source <code>dataStream</code> and schema fields,
 * and select the registered stream as initial stream to connect to Siddhi Runtime.
 *
 * @see #registerStream(String, DataStream, String...)
 * @see #from(String)
 */
public <T> SiddhiStream.SingleSiddhiStream<T> from(String streamId, DataStream<T> dataStream, String... fieldNames) {
    Preconditions.checkNotNull(streamId,"streamId");
    Preconditions.checkNotNull(dataStream,"dataStream");
    Preconditions.checkNotNull(fieldNames,"fieldNames");
    this.registerStream(streamId, dataStream, fieldNames);
    return new SiddhiStream.SingleSiddhiStream<>(streamId, this);
}

Example #26

Source File: InsideDataSource.java From flink-simple-tutorial with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        // 添加数组作为数据输入源
        String[] elementInput = new String[]{"hello Flink", "Second Line"};
        DataStream<String> text = env.fromElements(elementInput);

        // 添加List集合作为数据输入源
        List<String> collectionInput = new ArrayList<>();
        collectionInput.add("hello Flink");
        DataStream<String> text2 = env.fromCollection(collectionInput);

        // 添加Socket作为数据输入源
        // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数)
        DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4);


        // 添加文件源
        // 直接读取文本文件
        DataStream<String> text4 = env.readTextFile("/opt/history.log");
        // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms
        DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) {
            @Override
            protected String fillRecord(String s, Object[] objects) {
                return null;
            }
        },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10);

        text.print();

        env.execute("Inside DataSource Demo");
    }

Example #27

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraphForManagedMemoryFractionTest(
	final List<ResourceSpec> resourceSpecs,
	@Nullable final List<Integer> managedMemoryWeights) throws Exception {

	final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) {
		}

		@Override
		public void cancel() {
		}
	});
	opMethod.invoke(source, resourceSpecs.get(0));

	// CHAIN(source -> map1) in default slot sharing group
	final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map1, resourceSpecs.get(1));

	// CHAIN(map2) in default slot sharing group
	final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map2, resourceSpecs.get(2));

	// CHAIN(map3) in test slot sharing group
	final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
	opMethod.invoke(map3, resourceSpecs.get(3));

	if (managedMemoryWeights != null) {
		source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0));
		map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1));
		map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2));
		map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3));
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}

Example #28

Source File: IterateITCase.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}

Example #29

Source File: StreamingOperatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();
	DataStream<Integer> input = env.fromElements(1, 2, 3);
	input.flatMap(new FlatMapFunction<Integer, Integer>() {
		@Override
		public void flatMap(Integer value, Collector<Integer> out) throws Exception {
			out.collect(value << 1);
		}
	});
	env.execute();
}

Example #30

Source File: NumSeqSourceStreamOp.java From Alink with Apache License 2.0

5 votes

public NumSeqSourceStreamOp(long from, long to, String colName, double timePerSample, Params params) {
    super(params);

    DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to);
    DataStream<Long> data = seq.map(new transform(new Double[]{timePerSample}));

    this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName));
}