Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public void createMigrationJob(StreamExecutionEnvironment env) {
	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless);
}
 
Example 2
Source Project: Flink-CEPplus   Source File: Kafka011Example.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}
 
Example 3
Source Project: flink   Source File: IntervalJoinITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = UnsupportedTimeCharacteristicException.class)
public void testExecutionFailsInProcessingTime() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1));
	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1));

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(0))
		.process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {
			@Override
			public void processElement(Tuple2<String, Integer> left,
				Tuple2<String, Integer> right, Context ctx,
				Collector<String> out) throws Exception {
				out.collect(left + ":" + right);
			}
		});
}
 
Example 4
Source Project: flink   Source File: DataStreamTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics.
 */
@Test
public void testErgonomicWatermarkStrategy() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> input = env.fromElements("bonjour");

	// as soon as you have a chain of methods the first call needs a generic
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.forBoundedOutOfOrderness(Duration.ofMillis(10)));

	// as soon as you have a chain of methods the first call needs to specify the generic type
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.<String>forBoundedOutOfOrderness(Duration.ofMillis(10))
					.withTimestampAssigner((event, timestamp) -> 42L));
}
 
Example 5
Source Project: Flink-CEPplus   Source File: WindowTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example 6
Source Project: bahir-flink   Source File: SiddhiCEPITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedTupleSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple4<Integer, String, Double, Long>> input = env
        .addSource(new RandomTupleSource(5).closeDelay(1500)).keyBy(1);

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 7
Source Project: pulsar-flink   Source File: FlinkPulsarITest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testStartFromSpecific() throws Exception {
    String topic = newTopic();
    List<MessageId> mids = sendTypedMessages(topic, SchemaType.INT32, Arrays.asList(
            //  0,   1,   2, 3, 4, 5,  6,  7,  8
            -20, -21, -22, 1, 2, 3, 10, 11, 12), Optional.empty());

    Map<String, Set<Integer>> expectedData = new HashMap<>();
    expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12)));

    Map<String, MessageId> offset = new HashMap<>();
    offset.put(topic, mids.get(3));

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    Properties sourceProps = sourceProperties();
    sourceProps.setProperty(TOPIC_SINGLE_OPTION_KEY, topic);
    DataStream stream = see.addSource(
            new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromSpecificOffsets(offset));
    stream.flatMap(new CheckAllMessageExist(expectedData, 5)).setParallelism(1);

    TestUtils.tryExecute(see, "start from specific");
}
 
Example 8
@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}
 
Example 9
Source Project: flink   Source File: StreamTaskTimerITCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		verifyJobExecutionException(e);
	}
}
 
Example 10
Source Project: flink   Source File: CollectITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}
 
Example 11
Source Project: flink   Source File: GroupedProcessingTimeWindowExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);

		DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource());

		stream
			.keyBy(0)
			.timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
			.reduce(new SummingReducer())

			// alternative: use a apply function which does not pre-aggregate
//			.keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>())
//			.window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
//			.apply(new SummingWindowFunction())

			.addSink(new SinkFunction<Tuple2<Long, Long>>() {
				@Override
				public void invoke(Tuple2<Long, Long> value) {
				}
			});

		env.execute();
	}
 
Example 12
Source Project: toolbox   Source File: StreamWordCountExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .fromElements("Who's there?",
                        "I think I hear them. Stand, ho! Who's there?")
                //.socketTextStream("localhost", 9999)
                .flatMap(new Splitter())
                .keyBy(0)
                .sum(1);

        dataStream.print();

        env.execute();

        //env.execute("Socket Stream WordCount");
    }
 
Example 13
Source Project: da-streamingledger   Source File: Union.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Union differently typed {@link DataStream}s into single {@code DataStream}.
 *
 * <p>The resulting {@code DataStream} is of type {@link TaggedElement} where
 * {@link TaggedElement#getDataStreamTag()} corresponds to the list position of the source {@code DataStream} in
 * {@code inputs} that produced that element, and {@link TaggedElement#getElement()} is the element produced.
 *
 * @param inputs the input data streams to union.
 * @return a {@code DataStream} that corresponds to the union of all the input {@link DataStream}s
 */
public static DataStream<TaggedElement> apply(List<DataStream<?>> inputs) {
    checkArgument(!inputs.isEmpty(), "union requires at least one input data stream.");

    List<DataStream<TaggedElement>> taggedInputs = tagInputStreams(inputs);
    if (taggedInputs.size() == 1) {
        return taggedInputs.get(0);
    }
    DataStream<TaggedElement> first = taggedInputs.get(0);
    List<DataStream<TaggedElement>> restList = taggedInputs.subList(1, taggedInputs.size());

    @SuppressWarnings({"unchecked", "raw"})
    DataStream<TaggedElement>[] restArray = (DataStream<TaggedElement>[]) new DataStream[restList.size()];
    DataStream<TaggedElement>[] rest = restList.toArray(restArray);
    return first.union(rest);
}
 
Example 14
Source Project: blog_demos   Source File: ReadTextFile.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度为1
    env.setParallelism(1);

    //用txt文件作为数据源
    DataStream<String> textDataStream = env.readTextFile("file:///Users/zhaoqin/temp/202003/14/README.txt", "UTF-8");

    //统计单词数量并打印出来
    textDataStream
            .flatMap(new Splitter())
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : readTextFile");
}
 
Example 15
Source Project: Flink-CEPplus   Source File: WindowTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testProcessWithCustomTrigger() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.trigger(CountTrigger.of(1))
			.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(in);
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example 16
Source Project: flink   Source File: StreamTableEnvironmentImpl.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> Table fromDataStream(DataStream<T> dataStream, String fields) {
	List<Expression> expressions = ExpressionParser.parseExpressionList(fields);
	JavaDataStreamQueryOperation<T> queryOperation = asQueryOperation(
		dataStream,
		Optional.of(expressions));

	return createTable(queryOperation);
}
 
Example 17
Source Project: flink   Source File: WindowTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregateWithProcessWindowFunctionEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple3<String, String, Integer>> source = env.fromElements(
		Tuple3.of("hello", "hallo", 1),
		Tuple3.of("hello", "hallo", 2));

	DataStream<String> window = source
			.keyBy(new Tuple3KeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.aggregate(new DummyAggregationFunction(), new TestProcessWindowFunction());

	final OneInputTransformation<Tuple3<String, String, Integer>, String> transform =
		(OneInputTransformation<Tuple3<String, String, Integer>, String>) window.getTransformation();

	final OneInputStreamOperator<Tuple3<String, String, Integer>, String> operator = transform.getOperator();

	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator =
		(WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator;

	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor);

	processElementAndEnsureOutput(
			operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1));
}
 
Example 18
Source Project: flink   Source File: FlinkKafkaProducer010.java    License: Apache License 2.0 5 votes vote down vote up
private FlinkKafkaProducer010Configuration(
		DataStreamSink<T> originalSink,
		DataStream<T> inputStream,
		FlinkKafkaProducer010<T> producer) {
	//noinspection unchecked
	super(inputStream, originalSink.getTransformation().getOperator());
	this.transformation = originalSink.getTransformation();
	this.producer = producer;
}
 
Example 19
Source Project: flink-learning   Source File: MyRetractStreamTableSink.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DataStreamSink<?> consumeDataStream(DataStream<Tuple2<Boolean, Row>> dataStream) {
    return dataStream.addSink(new SinkFunction<Tuple2<Boolean, Row>>() {
        @Override
        public void invoke(Tuple2<Boolean, Row> value, Context context) throws Exception {
            //自定义Sink
            // f0==true :插入新数据
            // f0==false:删除旧数据
            if (value.f0) {
                //可以写入MySQL、Kafka或者发HttpPost...根据具体情况开发
                System.out.println(value.f1);
            }
        }
    });
}
 
Example 20
Source Project: Flink-CEPplus   Source File: KafkaTestEnvironmentImpl.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer<T>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}
 
Example 21
public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}
 
Example 22
Source Project: flink   Source File: KafkaTestEnvironmentImpl.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer011<>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer011.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}
 
Example 23
/**
 * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
 * This method assumes that <b>elements are already grouped by key</b>.
 * <p/>
 * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)}
 * is that this method assumes that a combiner function is provided
 * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
 * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state.
 *
 * @param options            the general job configuration options.
 * @param input              the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
 * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
 * @param combiner           the combiner to be used.
 * @param outputKvCoder      the type of the output values.
 */
public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create(
		PipelineOptions options,
		PCollection input,
		KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey,
		Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner,
		KvCoder<K, VOUT> outputKvCoder) {
	Preconditions.checkNotNull(options);

	KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
	FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options,
			input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner);

	Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
			outputKvCoder,
			input.getWindowingStrategy().getWindowFn().windowCoder());

	CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo =
			new CoderTypeInformation<>(windowedOutputElemCoder);

	DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey
			.transform("GroupByWindowWithCombiner",
					new CoderTypeInformation<>(outputKvCoder),
					windower)
			.returns(outputTypeInfo);

	return groupedByKeyAndWindow;
}
 
Example 24
Source Project: flink   Source File: KafkaShuffleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer(
		String topic,
		StreamExecutionEnvironment env,
		int numberOfPartitions,
		int producerParallelism,
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	createTestTopic(topic, numberOfPartitions, 1);

	env.setParallelism(producerParallelism);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(timeCharacteristic);

	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source;

	Properties properties = kafkaServer.getStandardProperties();
	Properties kafkaProperties = PropertiesUtil.flatten(properties);

	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));
	kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0);

	env.execute("Write to " + topic);
	ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder();

	for (int p = 0; p < numberOfPartitions; p++) {
		results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000));
	}

	deleteTestTopic(topic);

	return results.build();
}
 
Example 25
Source Project: flink   Source File: StreamingJobGraphGeneratorTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}
 
Example 26
Source Project: bahir-flink   Source File: SiddhiCEP.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Register stream with unique <code>streaId</code>, source <code>dataStream</code> and schema fields,
 * and select the registered stream as initial stream to connect to Siddhi Runtime.
 *
 * @see #registerStream(String, DataStream, String...)
 * @see #from(String)
 */
public <T> SiddhiStream.SingleSiddhiStream<T> from(String streamId, DataStream<T> dataStream, String... fieldNames) {
    Preconditions.checkNotNull(streamId,"streamId");
    Preconditions.checkNotNull(dataStream,"dataStream");
    Preconditions.checkNotNull(fieldNames,"fieldNames");
    this.registerStream(streamId, dataStream, fieldNames);
    return new SiddhiStream.SingleSiddhiStream<>(streamId, this);
}
 
Example 27
Source Project: flink-simple-tutorial   Source File: InsideDataSource.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        // 添加数组作为数据输入源
        String[] elementInput = new String[]{"hello Flink", "Second Line"};
        DataStream<String> text = env.fromElements(elementInput);

        // 添加List集合作为数据输入源
        List<String> collectionInput = new ArrayList<>();
        collectionInput.add("hello Flink");
        DataStream<String> text2 = env.fromCollection(collectionInput);

        // 添加Socket作为数据输入源
        // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数)
        DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4);


        // 添加文件源
        // 直接读取文本文件
        DataStream<String> text4 = env.readTextFile("/opt/history.log");
        // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms
        DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) {
            @Override
            protected String fillRecord(String s, Object[] objects) {
                return null;
            }
        },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10);

        text.print();

        env.execute("Inside DataSource Demo");
    }
 
Example 28
Source Project: flink   Source File: StreamingOperatorsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();
	DataStream<Integer> input = env.fromElements(1, 2, 3);
	input.flatMap(new FlatMapFunction<Integer, Integer>() {
		@Override
		public void flatMap(Integer value, Collector<Integer> out) throws Exception {
			out.collect(value << 1);
		}
	});
	env.execute();
}
 
Example 29
Source Project: flink   Source File: IterateITCase.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example 30
Source Project: Alink   Source File: DataStreamConversionUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert the given DataStream to Table with specified colNames.
 *
 * @param session the MLEnvironment using to convert DataSet to Table.
 * @param data     the DataStream to convert.
 * @param colNames the specified colNames.
 * @return the converted Table.
 */
public static Table toTable(MLEnvironment session, DataStream <Row> data, String[] colNames) {
	if (null == colNames || colNames.length == 0) {
		return session.getStreamTableEnvironment().fromDataStream(data);
	} else {
		StringBuilder sbd = new StringBuilder();
		sbd.append(colNames[0]);
		for (int i = 1; i < colNames.length; i++) {
			sbd.append(",").append(colNames[i]);
		}
		return session.getStreamTableEnvironment().fromDataStream(data, sbd.toString());
	}
}