org.apache.flink.streaming.api.datastream.DataStream Java Examples

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataStreamTest.java    From flink with Apache License 2.0 7 votes vote down vote up
/**
 * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics.
 */
@Test
public void testErgonomicWatermarkStrategy() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> input = env.fromElements("bonjour");

	// as soon as you have a chain of methods the first call needs a generic
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.forBoundedOutOfOrderness(Duration.ofMillis(10)));

	// as soon as you have a chain of methods the first call needs to specify the generic type
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.<String>forBoundedOutOfOrderness(Duration.ofMillis(10))
					.withTimestampAssigner((event, timestamp) -> 42L));
}
 
Example #2
Source File: Kafka011Example.java    From Flink-CEPplus with Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}
 
Example #3
Source File: BucketingSinkFaultToleranceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}
 
Example #4
Source File: ReadTextFile.java    From blog_demos with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度为1
    env.setParallelism(1);

    //用txt文件作为数据源
    DataStream<String> textDataStream = env.readTextFile("file:///Users/zhaoqin/temp/202003/14/README.txt", "UTF-8");

    //统计单词数量并打印出来
    textDataStream
            .flatMap(new Splitter())
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : readTextFile");
}
 
Example #5
Source File: Union.java    From da-streamingledger with Apache License 2.0 6 votes vote down vote up
/**
 * Union differently typed {@link DataStream}s into single {@code DataStream}.
 *
 * <p>The resulting {@code DataStream} is of type {@link TaggedElement} where
 * {@link TaggedElement#getDataStreamTag()} corresponds to the list position of the source {@code DataStream} in
 * {@code inputs} that produced that element, and {@link TaggedElement#getElement()} is the element produced.
 *
 * @param inputs the input data streams to union.
 * @return a {@code DataStream} that corresponds to the union of all the input {@link DataStream}s
 */
public static DataStream<TaggedElement> apply(List<DataStream<?>> inputs) {
    checkArgument(!inputs.isEmpty(), "union requires at least one input data stream.");

    List<DataStream<TaggedElement>> taggedInputs = tagInputStreams(inputs);
    if (taggedInputs.size() == 1) {
        return taggedInputs.get(0);
    }
    DataStream<TaggedElement> first = taggedInputs.get(0);
    List<DataStream<TaggedElement>> restList = taggedInputs.subList(1, taggedInputs.size());

    @SuppressWarnings({"unchecked", "raw"})
    DataStream<TaggedElement>[] restArray = (DataStream<TaggedElement>[]) new DataStream[restList.size()];
    DataStream<TaggedElement>[] rest = restList.toArray(restArray);
    return first.union(rest);
}
 
Example #6
Source File: AbstractNonKeyedOperatorRestoreTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void createMigrationJob(StreamExecutionEnvironment env) {
	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless);
}
 
Example #7
Source File: GroupedProcessingTimeWindowExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);

		DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource());

		stream
			.keyBy(0)
			.timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
			.reduce(new SummingReducer())

			// alternative: use a apply function which does not pre-aggregate
//			.keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>())
//			.window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
//			.apply(new SummingWindowFunction())

			.addSink(new SinkFunction<Tuple2<Long, Long>>() {
				@Override
				public void invoke(Tuple2<Long, Long> value) {
				}
			});

		env.execute();
	}
 
Example #8
Source File: StreamWordCountExample.java    From toolbox with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .fromElements("Who's there?",
                        "I think I hear them. Stand, ho! Who's there?")
                //.socketTextStream("localhost", 9999)
                .flatMap(new Splitter())
                .keyBy(0)
                .sum(1);

        dataStream.print();

        env.execute();

        //env.execute("Socket Stream WordCount");
    }
 
Example #9
Source File: IntervalJoinITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test(expected = UnsupportedTimeCharacteristicException.class)
public void testExecutionFailsInProcessingTime() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1));
	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1));

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(0))
		.process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {
			@Override
			public void processElement(Tuple2<String, Integer> left,
				Tuple2<String, Integer> right, Context ctx,
				Collector<String> out) throws Exception {
				out.collect(left + ":" + right);
			}
		});
}
 
Example #10
Source File: CollectITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}
 
Example #11
Source File: WindowTranslationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #12
Source File: StreamTaskTimerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		verifyJobExecutionException(e);
	}
}
 
Example #13
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedTupleSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple4<Integer, String, Double, Long>> input = env
        .addSource(new RandomTupleSource(5).closeDelay(1500)).keyBy(1);

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example #14
Source File: FlinkPulsarITest.java    From pulsar-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testStartFromSpecific() throws Exception {
    String topic = newTopic();
    List<MessageId> mids = sendTypedMessages(topic, SchemaType.INT32, Arrays.asList(
            //  0,   1,   2, 3, 4, 5,  6,  7,  8
            -20, -21, -22, 1, 2, 3, 10, 11, 12), Optional.empty());

    Map<String, Set<Integer>> expectedData = new HashMap<>();
    expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12)));

    Map<String, MessageId> offset = new HashMap<>();
    offset.put(topic, mids.get(3));

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    Properties sourceProps = sourceProperties();
    sourceProps.setProperty(TOPIC_SINGLE_OPTION_KEY, topic);
    DataStream stream = see.addSource(
            new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromSpecificOffsets(offset));
    stream.flatMap(new CheckAllMessageExist(expectedData, 5)).setParallelism(1);

    TestUtils.tryExecute(see, "start from specific");
}
 
Example #15
Source File: KafkaTestEnvironmentImpl.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer<T>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}
 
Example #16
Source File: KafkaTopicValidator.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}
 
Example #17
Source File: KafkaTestEnvironmentImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer011<>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer011.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}
 
Example #18
Source File: MyRetractStreamTableSink.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
@Override
public DataStreamSink<?> consumeDataStream(DataStream<Tuple2<Boolean, Row>> dataStream) {
    return dataStream.addSink(new SinkFunction<Tuple2<Boolean, Row>>() {
        @Override
        public void invoke(Tuple2<Boolean, Row> value, Context context) throws Exception {
            //自定义Sink
            // f0==true :插入新数据
            // f0==false:删除旧数据
            if (value.f0) {
                //可以写入MySQL、Kafka或者发HttpPost...根据具体情况开发
                System.out.println(value.f1);
            }
        }
    });
}
 
Example #19
Source File: KafkaTableSourceBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * NOTE: This method is for internal use only for defining a TableSource.
 *       Do not use it in Table API programs.
 */
@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {

	DeserializationSchema<Row> deserializationSchema = getDeserializationSchema();
	// Version-specific Kafka consumer
	FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
	return env.addSource(kafkaConsumer).name(explainSource());
}
 
Example #20
Source File: DataStreamConversionUtil.java    From Alink with Apache License 2.0 5 votes vote down vote up
/**
 * Convert the given DataStream to Table with specified colNames.
 *
 * @param session the MLEnvironment using to convert DataSet to Table.
 * @param data     the DataStream to convert.
 * @param colNames the specified colNames.
 * @return the converted Table.
 */
public static Table toTable(MLEnvironment session, DataStream <Row> data, String[] colNames) {
	if (null == colNames || colNames.length == 0) {
		return session.getStreamTableEnvironment().fromDataStream(data);
	} else {
		StringBuilder sbd = new StringBuilder();
		sbd.append(colNames[0]);
		for (int i = 1; i < colNames.length; i++) {
			sbd.append(",").append(colNames[i]);
		}
		return session.getStreamTableEnvironment().fromDataStream(data, sbd.toString());
	}
}
 
Example #21
Source File: KafkaShuffleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer(
		String topic,
		StreamExecutionEnvironment env,
		int numberOfPartitions,
		int producerParallelism,
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	createTestTopic(topic, numberOfPartitions, 1);

	env.setParallelism(producerParallelism);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(timeCharacteristic);

	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source;

	Properties properties = kafkaServer.getStandardProperties();
	Properties kafkaProperties = PropertiesUtil.flatten(properties);

	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));
	kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0);

	env.execute("Write to " + topic);
	ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder();

	for (int p = 0; p < numberOfPartitions; p++) {
		results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000));
	}

	deleteTestTopic(topic);

	return results.build();
}
 
Example #22
Source File: AllWindowTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testFoldWithProcessAllWindowFunctionEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window = source
			.windowAll(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.fold(new Tuple3<>("", "", 0), new DummyFolder(), new ProcessAllWindowFunction<Tuple3<String, String, Integer>, Tuple2<String, Integer>, TimeWindow>() {
				private static final long serialVersionUID = 1L;
				@Override
				public void process(
						Context ctx,
						Iterable<Tuple3<String, String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple3<String, String, Integer> in : values) {
						out.collect(new Tuple2<>(in.f0, in.f2));
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof FoldingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #23
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}
 
Example #24
Source File: WindowTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testProcessWithCustomTrigger() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.trigger(CountTrigger.of(1))
			.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(in);
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #25
Source File: SiddhiCEP.java    From bahir-flink with Apache License 2.0 5 votes vote down vote up
/**
 * Register stream with unique <code>streaId</code>, source <code>dataStream</code> and schema fields,
 * and select the registered stream as initial stream to connect to Siddhi Runtime.
 *
 * @see #registerStream(String, DataStream, String...)
 * @see #from(String)
 */
public <T> SiddhiStream.SingleSiddhiStream<T> from(String streamId, DataStream<T> dataStream, String... fieldNames) {
    Preconditions.checkNotNull(streamId,"streamId");
    Preconditions.checkNotNull(dataStream,"dataStream");
    Preconditions.checkNotNull(fieldNames,"fieldNames");
    this.registerStream(streamId, dataStream, fieldNames);
    return new SiddhiStream.SingleSiddhiStream<>(streamId, this);
}
 
Example #26
Source File: InsideDataSource.java    From flink-simple-tutorial with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        // 添加数组作为数据输入源
        String[] elementInput = new String[]{"hello Flink", "Second Line"};
        DataStream<String> text = env.fromElements(elementInput);

        // 添加List集合作为数据输入源
        List<String> collectionInput = new ArrayList<>();
        collectionInput.add("hello Flink");
        DataStream<String> text2 = env.fromCollection(collectionInput);

        // 添加Socket作为数据输入源
        // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数)
        DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4);


        // 添加文件源
        // 直接读取文本文件
        DataStream<String> text4 = env.readTextFile("/opt/history.log");
        // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms
        DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) {
            @Override
            protected String fillRecord(String s, Object[] objects) {
                return null;
            }
        },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10);

        text.print();

        env.execute("Inside DataSource Demo");
    }
 
Example #27
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraphForManagedMemoryFractionTest(
	final List<ResourceSpec> resourceSpecs,
	@Nullable final List<Integer> managedMemoryWeights) throws Exception {

	final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) {
		}

		@Override
		public void cancel() {
		}
	});
	opMethod.invoke(source, resourceSpecs.get(0));

	// CHAIN(source -> map1) in default slot sharing group
	final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map1, resourceSpecs.get(1));

	// CHAIN(map2) in default slot sharing group
	final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map2, resourceSpecs.get(2));

	// CHAIN(map3) in test slot sharing group
	final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
	opMethod.invoke(map3, resourceSpecs.get(3));

	if (managedMemoryWeights != null) {
		source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0));
		map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1));
		map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2));
		map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3));
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
 
Example #28
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example #29
Source File: StreamingOperatorsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();
	DataStream<Integer> input = env.fromElements(1, 2, 3);
	input.flatMap(new FlatMapFunction<Integer, Integer>() {
		@Override
		public void flatMap(Integer value, Collector<Integer> out) throws Exception {
			out.collect(value << 1);
		}
	});
	env.execute();
}
 
Example #30
Source File: NumSeqSourceStreamOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
public NumSeqSourceStreamOp(long from, long to, String colName, double timePerSample, Params params) {
    super(params);

    DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to);
    DataStream<Long> data = seq.map(new transform(new Double[]{timePerSample}));

    this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName));
}