Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setParallelism()
The following examples show how to use
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setParallelism() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConsumeFromDynamoDBStreams.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties dynamodbStreamsConsumerConfig = new Properties(); final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME); dynamodbStreamsConsumerConfig.setProperty( ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); dynamodbStreamsConsumerConfig.setProperty( ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); dynamodbStreamsConsumerConfig.setProperty( ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>( streamName, new SimpleStringSchema(), dynamodbStreamsConsumerConfig)); dynamodbStreams.print(); see.execute(); }
Example 2
Source File: SavepointReaderITTestBase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testOperatorStateInputFormat() throws Exception { StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); streamEnv.setParallelism(4); DataStream<Integer> data = streamEnv .addSource(new SavepointSource()) .rebalance(); data .connect(data.broadcast(broadcast)) .process(new StatefulOperator(list, union, broadcast)) .uid(UID) .addSink(new DiscardingSink<>()); JobGraph jobGraph = streamEnv.getStreamGraph().getJobGraph(); String savepoint = takeSavepoint(jobGraph); ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment(); verifyListState(savepoint, batchEnv); verifyUnionState(savepoint, batchEnv); verifyBroadcastState(savepoint, batchEnv); }
Example 3
Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( UvExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig .Builder().setHost("192.168.30.244").build(); env.addSource(kafkaConsumer) .map(string -> { // 反序列化 JSON UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson( string, UserVisitWebEvent.class); // 生成 Redis key,格式为 日期_pageId,如: 20191026_0 String redisKey = userVisitWebEvent.getDate() + "_" + userVisitWebEvent.getPageId(); return Tuple2.of(redisKey, userVisitWebEvent.getUserId()); }) .returns(new TypeHint<Tuple2<String, String>>(){}) .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper())); env.execute("Redis Set UV Stat"); }
Example 4
Source File: SavepointReaderKeyedStateITCase.java From flink with Apache License 2.0 | 5 votes |
private void runKeyedState(StateBackend backend) throws Exception { StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); streamEnv.setStateBackend(backend); streamEnv.setParallelism(4); streamEnv .addSource(new SavepointSource()) .rebalance() .keyBy(id -> id.key) .process(new KeyedStatefulOperator()) .uid(uid) .addSink(new DiscardingSink<>()); JobGraph jobGraph = streamEnv.getStreamGraph().getJobGraph(); String path = takeSavepoint(jobGraph); ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment(); ExistingSavepoint savepoint = Savepoint.load(batchEnv, path, backend); List<Pojo> results = savepoint .readKeyedState(uid, new Reader()) .collect(); Set<Pojo> expected = SavepointSource.getElements(); Assert.assertEquals("Unexpected results from keyed state", expected, new HashSet<>(results)); }
Example 5
Source File: YARNITCase.java From flink with Apache License 2.0 | 5 votes |
private JobGraph getTestingJobGraph() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.addSource(new NoDataSource()) .shuffle() .addSink(new DiscardingSink<>()); return env.getStreamGraph().getJobGraph(); }
Example 6
Source File: FromCollection.java From blog_demos with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //并行度为1 env.setParallelism(1); //创建一个List,里面有两个Tuple2元素 List<Tuple2<String, Integer>> list = new ArrayList<>(); list.add(new Tuple2("aaa", 1)); list.add(new Tuple2("bbb", 1)); //通过List创建DataStream DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list); //通过多个Tuple2元素创建DataStream DataStream<Tuple2<String, Integer>> fromElementDataStream = env.fromElements( new Tuple2("ccc", 1), new Tuple2("ddd", 1), new Tuple2("aaa", 1) ); //通过union将两个DataStream合成一个 DataStream<Tuple2<String, Integer>> unionDataStream = fromCollectionDataStream.union(fromElementDataStream); //统计每个单词的数量 unionDataStream .keyBy(0) .sum(1) .print(); env.execute("API DataSource demo : collection"); }
Example 7
Source File: IntervalJoinITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBoundsCanBeExclusive() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements( Tuple2.of("key", 0), Tuple2.of("key", 1), Tuple2.of("key", 2) ).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor()); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements( Tuple2.of("key", 0), Tuple2.of("key", 1), Tuple2.of("key", 2) ).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor()); streamOne.keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), Time.milliseconds(2)) .upperBoundExclusive() .lowerBoundExclusive() .process(new CombineToStringJoinFunction()) .addSink(new ResultSink()); env.execute(); expectInAnyOrder( "(key,0):(key,1)", "(key,1):(key,2)" ); }
Example 8
Source File: RichParrelSourceFunctionDemo.java From blog_demos with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //并行度为2 env.setParallelism(2); DataStream<Student> dataStream = env.addSource(new MySQLParrelDataSource()); dataStream.print(); env.execute("Customize DataSource demo : RichParrelSourceFunction"); }
Example 9
Source File: IntervalJoinITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testBoundsCanBeInclusive() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements( Tuple2.of("key", 0), Tuple2.of("key", 1), Tuple2.of("key", 2) ).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor()); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements( Tuple2.of("key", 0), Tuple2.of("key", 1), Tuple2.of("key", 2) ).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor()); streamOne.keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), Time.milliseconds(2)) .process(new CombineToStringJoinFunction()) .addSink(new ResultSink()); env.execute(); expectInAnyOrder( "(key,0):(key,0)", "(key,0):(key,1)", "(key,0):(key,2)", "(key,1):(key,1)", "(key,1):(key,2)", "(key,2):(key,2)" ); }
Example 10
Source File: JdbcITCase.java From flink with Apache License 2.0 | 5 votes |
@Test @Ignore public void testInsert() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration()); env.setParallelism(1); env.fromElements(TEST_DATA) .addSink(JdbcSink.sink( String.format(INSERT_TEMPLATE, INPUT_TABLE), (ps, t) -> { ps.setInt(1, t.id); ps.setString(2, t.title); ps.setString(3, t.author); if (t.price == null) { ps.setNull(4, Types.DOUBLE); } else { ps.setDouble(4, t.price); } ps.setInt(5, t.qty); }, new JdbcConnectionOptionsBuilder() .withUrl(getDbMetadata().getUrl()) .withDriverName(getDbMetadata().getDriverClass()) .build())); env.execute(); assertEquals(Arrays.asList(TEST_DATA), selectBooks()); }
Example 11
Source File: ReinterpretDataStreamAsKeyedStreamITCase.java From flink with Apache License 2.0 | 5 votes |
/** * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the * files created by the first jobs as sources (doing the correct assignment of files to partitions) and * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job. */ @Test public void testReinterpretAsKeyedStream() throws Exception { final int maxParallelism = 8; final int numEventsPerInstance = 100; final int parallelism = 3; final int numTotalEvents = numEventsPerInstance * parallelism; final int numUniqueKeys = 100; final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.setMaxParallelism(maxParallelism); env.setParallelism(parallelism); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L)); final List<File> partitionFiles = new ArrayList<>(parallelism); for (int i = 0; i < parallelism; ++i) { File partitionFile = temporaryFolder.newFile(); partitionFiles.add(i, partitionFile); } env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys)) .keyBy(0) .addSink(new ToPartitionFileSink(partitionFiles)); env.execute(); DataStreamUtils.reinterpretAsKeyedStream( env.addSource(new FromPartitionFileSource(partitionFiles)), (KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0, TypeInformation.of(Integer.class)) .timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected .reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) -> new Tuple2<>(value1.f0, value1.f1 + value2.f1)) .addSink(new ValidatingSink(numTotalEvents)).setParallelism(1); env.execute(); }
Example 12
Source File: FlinkSQLDistinctExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); blinkStreamEnv.setParallelism(1); EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance() .useBlinkPlanner() .inStreamingMode() .build(); StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings); String ddlSource = "CREATE TABLE user_behavior (\n" + " user_id BIGINT,\n" + " item_id BIGINT,\n" + " category_id BIGINT,\n" + " behavior STRING,\n" + " ts TIMESTAMP(3)\n" + ") WITH (\n" + " 'connector.type' = 'kafka',\n" + " 'connector.version' = '0.11',\n" + " 'connector.topic' = 'user_behavior',\n" + " 'connector.startup-mode' = 'latest-offset',\n" + " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + " 'format.type' = 'json'\n" + ")"; String countSql = "select user_id, count(user_id) from user_behavior group by user_id"; blinkStreamTableEnv.sqlUpdate(ddlSource); Table countTable = blinkStreamTableEnv.sqlQuery(countSql); blinkStreamTableEnv.toRetractStream(countTable, Row.class).print(); String distinctSql = "select distinct(user_id) from user_behavior"; Table distinctTable = blinkStreamTableEnv.sqlQuery(distinctSql); blinkStreamTableEnv.toRetractStream(distinctTable, Row.class).print("=="); blinkStreamTableEnv.execute("Blink Stream SQL count/distinct demo"); }
Example 13
Source File: TimestampITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * This tests whether timestamps are properly extracted in the timestamp * extractor and whether watermarks are also correctly forwarded from this with the auto watermark * interval. */ @Test public void testTimestampExtractorWithAutoInterval() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.getConfig().setAutoWatermarkInterval(10); env.setParallelism(1); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) throws Exception { int index = 1; while (index <= numElements) { ctx.collect(index); latch.await(); index++; } } @Override public void cancel() {} }); DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks( new AscendingTimestampExtractor<Integer>() { @Override public long extractAscendingTimestamp(Integer element) { return element; } }); extractOp .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)) .transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()); // verify that extractor picks up source parallelism Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism()); env.execute(); // verify that we get NUM_ELEMENTS watermarks for (int j = 0; j < numElements; j++) { if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) { long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp(); Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]); } } // the input is finite, so it should have a MAX Watermark assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1)); }
Example 14
Source File: PartitionerITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void partitionerTest() { TestListResultSink<Tuple2<Integer, String>> hashPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>(); TestListResultSink<Tuple2<Integer, String>> customPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>(); TestListResultSink<Tuple2<Integer, String>> broadcastPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>(); TestListResultSink<Tuple2<Integer, String>> forwardPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>(); TestListResultSink<Tuple2<Integer, String>> rebalancePartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>(); TestListResultSink<Tuple2<Integer, String>> globalPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); DataStream<Tuple1<String>> src = env.fromCollection( INPUT.stream().map(Tuple1::of).collect(Collectors.toList())); // partition by hash src .keyBy(0) .map(new SubtaskIndexAssigner()) .addSink(hashPartitionResultSink); // partition custom DataStream<Tuple2<Integer, String>> partitionCustom = src .partitionCustom(new Partitioner<String>() { @Override public int partition(String key, int numPartitions) { if (key.equals("c")) { return 2; } else { return 0; } } }, 0) .map(new SubtaskIndexAssigner()); partitionCustom.addSink(customPartitionResultSink); // partition broadcast src.broadcast().map(new SubtaskIndexAssigner()).addSink(broadcastPartitionResultSink); // partition rebalance src.rebalance().map(new SubtaskIndexAssigner()).addSink(rebalancePartitionResultSink); // partition forward src.map(new MapFunction<Tuple1<String>, Tuple1<String>>() { private static final long serialVersionUID = 1L; @Override public Tuple1<String> map(Tuple1<String> value) throws Exception { return value; } }) .forward() .map(new SubtaskIndexAssigner()) .addSink(forwardPartitionResultSink); // partition global src.global().map(new SubtaskIndexAssigner()).addSink(globalPartitionResultSink); try { env.execute(); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } List<Tuple2<Integer, String>> hashPartitionResult = hashPartitionResultSink.getResult(); List<Tuple2<Integer, String>> customPartitionResult = customPartitionResultSink.getResult(); List<Tuple2<Integer, String>> broadcastPartitionResult = broadcastPartitionResultSink.getResult(); List<Tuple2<Integer, String>> forwardPartitionResult = forwardPartitionResultSink.getResult(); List<Tuple2<Integer, String>> rebalancePartitionResult = rebalancePartitionResultSink.getResult(); List<Tuple2<Integer, String>> globalPartitionResult = globalPartitionResultSink.getResult(); verifyHashPartitioning(hashPartitionResult); verifyCustomPartitioning(customPartitionResult); verifyBroadcastPartitioning(broadcastPartitionResult); verifyRebalancePartitioning(forwardPartitionResult); verifyRebalancePartitioning(rebalancePartitionResult); verifyGlobalPartitioning(globalPartitionResult); }
Example 15
Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public void runKeyValueTest() throws Exception { final String topic = "keyvaluetest"; createTestTopic(topic, 1, 1); final int elementCount = 5000; // ----------- Write some data into Kafka ------------------- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() { @Override public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception { Random rnd = new Random(1337); for (long i = 0; i < elementCount; i++) { PojoValue pojo = new PojoValue(); pojo.when = new Date(rnd.nextLong()); pojo.lon = rnd.nextLong(); pojo.lat = i; // make every second key null to ensure proper "null" serialization Long key = (i % 2 == 0) ? null : i; ctx.collect(new Tuple2<>(key, pojo)); } } @Override public void cancel() { } }); KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig()); Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "3"); kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null); env.execute("Write KV to Kafka"); // ----------- Read the data again ------------------- env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); KafkaDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig()); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props)); fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() { long counter = 0; @Override public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception { // the elements should be in order. Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter); if (value.f1.lat % 2 == 0) { assertNull("key was not null", value.f0); } else { Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter); } counter++; if (counter == elementCount) { // we got the right number of elements throw new SuccessException(); } } }); tryExecute(env, "Read KV from Kafka"); deleteTestTopic(topic); }
Example 16
Source File: WindowCheckpointingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testAggregatingTumblingProcessingTimeWindow() { final int numElements = 3000; try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(timeCharacteristic); env.getConfig().setAutoWatermarkInterval(10); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); SinkValidatorUpdaterAndChecker updaterAndChecker = new SinkValidatorUpdaterAndChecker(numElements, 1); env .addSource(new FailingSource(new Generator(), numElements, timeCharacteristic)) .map(new MapFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>>() { @Override public Tuple2<Long, IntType> map(Tuple2<Long, IntType> value) { value.f1.value = 1; return value; } }) .rebalance() .keyBy(0) .timeWindow(Time.of(100, MILLISECONDS)) .reduce(new ReduceFunction<Tuple2<Long, IntType>>() { @Override public Tuple2<Long, IntType> reduce( Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) { return new Tuple2<>(a.f0, new IntType(1)); } }) .addSink(new ValidatingSink<>(updaterAndChecker, updaterAndChecker, timeCharacteristic)) .setParallelism(1); tryExecute(env, "Aggregating Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 17
Source File: CoGroupJoinITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSelfJoin() throws Exception { testResults = new ArrayList<>(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() { private static final long serialVersionUID = 1L; @Override public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception { ctx.collect(Tuple3.of("a", "x", 0)); ctx.collect(Tuple3.of("a", "y", 1)); ctx.collect(Tuple3.of("a", "z", 2)); ctx.collect(Tuple3.of("b", "u", 3)); ctx.collect(Tuple3.of("b", "w", 5)); ctx.collect(Tuple3.of("a", "i", 6)); ctx.collect(Tuple3.of("a", "j", 7)); ctx.collect(Tuple3.of("a", "k", 8)); // source is finite, so it will have an implicit MAX watermark when it finishes } @Override public void cancel() { } }).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor()); source1.join(source1) .where(new Tuple3KeyExtractor()) .equalTo(new Tuple3KeyExtractor()) .window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))) .apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() { @Override public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception { return first + ":" + second; } }) .addSink(new SinkFunction<String>() { @Override public void invoke(String value) throws Exception { testResults.add(value); } }); env.execute("Self-Join Test"); List<String> expectedResult = Arrays.asList( "(a,x,0):(a,x,0)", "(a,x,0):(a,y,1)", "(a,x,0):(a,z,2)", "(a,y,1):(a,x,0)", "(a,y,1):(a,y,1)", "(a,y,1):(a,z,2)", "(a,z,2):(a,x,0)", "(a,z,2):(a,y,1)", "(a,z,2):(a,z,2)", "(b,u,3):(b,u,3)", "(b,u,3):(b,w,5)", "(b,w,5):(b,u,3)", "(b,w,5):(b,w,5)", "(a,i,6):(a,i,6)", "(a,i,6):(a,j,7)", "(a,i,6):(a,k,8)", "(a,j,7):(a,i,6)", "(a,j,7):(a,j,7)", "(a,j,7):(a,k,8)", "(a,k,8):(a,i,6)", "(a,k,8):(a,j,7)", "(a,k,8):(a,k,8)"); Collections.sort(expectedResult); Collections.sort(testResults); Assert.assertEquals(expectedResult, testResults); }
Example 18
Source File: EventTimeAllWindowCheckpointingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testPreAggregatedTumblingTimeWindow() { final int numElementsPerKey = 3000; final int windowSize = 100; final int numKeys = 1; try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); env .addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey)) .rebalance() .timeWindowAll(Time.of(windowSize, MILLISECONDS)) .reduce( new ReduceFunction<Tuple2<Long, IntType>>() { @Override public Tuple2<Long, IntType> reduce( Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) { return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value)); } }, new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); for (Tuple2<Long, IntType> in: input) { out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1)); } } }) .addSink(new ValidatingSink<>( new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey), new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize))) .setParallelism(1); env.execute("Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 19
Source File: Kafka010ITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka. */ @Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217") @Test(timeout = 60000) public void testTimestamps() throws Exception { final String topic = "tstopic"; createTestTopic(topic, 3, 1); // ---------- Produce an event time stream into Kafka ------------------- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() { private static final long serialVersionUID = -2255105836471289626L; boolean running = true; @Override public void run(SourceContext<Long> ctx) throws Exception { long i = 0; while (running) { ctx.collectWithTimestamp(i, i * 2); if (i++ == 1000L) { running = false; } } } @Override public void cancel() { running = false; } }); final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig()); FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() { private static final long serialVersionUID = -6730989584364230617L; @Override public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) { return (int) (next % 3); } }); prod.setParallelism(3); prod.setWriteTimestampToKafka(true); env.execute("Produce some"); // ---------- Consume stream from Kafka ------------------- env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps); kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() { private static final long serialVersionUID = -4834111073247835189L; @Nullable @Override public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) { if (lastElement % 10 == 0) { return new Watermark(lastElement); } return null; } @Override public long extractTimestamp(Long element, long previousElementTimestamp) { return previousElementTimestamp; } }); DataStream<Long> stream = env.addSource(kafkaSource); GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class); stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1); env.execute("Consume again"); deleteTestTopic(topic); }
Example 20
Source File: EventTimeWindowCheckpointingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSlidingTimeWindow() { final int numElementsPerKey = numElementsPerKey(); final int windowSize = windowSize(); final int windowSlide = windowSlide(); final int numKeys = numKeys(); try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setMaxParallelism(2 * PARALLELISM); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.setStateBackend(this.stateBackend); env.getConfig().setUseSnapshotCompression(true); env .addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey)) .rebalance() .keyBy(0) .timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS)) .apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); int sum = 0; long key = -1; for (Tuple2<Long, IntType> value : values) { sum += value.f1.value; key = value.f0; } final Tuple4<Long, Long, Long, IntType> output = new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)); out.collect(output); } }) .addSink(new ValidatingSink<>( new SinkValidatorUpdateFun(numElementsPerKey), new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1); env.execute("Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }