Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setParallelism()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setParallelism() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConsumeFromDynamoDBStreams.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties dynamodbStreamsConsumerConfig = new Properties();
	final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME);
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>(
			streamName,
			new SimpleStringSchema(),
			dynamodbStreamsConsumerConfig));

	dynamodbStreams.print();

	see.execute();
}
 
Example 2
Source File: SavepointReaderITTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testOperatorStateInputFormat() throws Exception {
	StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	streamEnv.setParallelism(4);

	DataStream<Integer> data = streamEnv
		.addSource(new SavepointSource())
		.rebalance();

	data
		.connect(data.broadcast(broadcast))
		.process(new StatefulOperator(list, union, broadcast))
		.uid(UID)
		.addSink(new DiscardingSink<>());

	JobGraph jobGraph = streamEnv.getStreamGraph().getJobGraph();

	String savepoint = takeSavepoint(jobGraph);

	ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();

	verifyListState(savepoint, batchEnv);

	verifyUnionState(savepoint, batchEnv);

	verifyBroadcastState(savepoint, batchEnv);
}
 
Example 3
Source File: RedisSetUvExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example 4
Source File: SavepointReaderKeyedStateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void runKeyedState(StateBackend backend) throws Exception {
	StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	streamEnv.setStateBackend(backend);
	streamEnv.setParallelism(4);

	streamEnv
		.addSource(new SavepointSource())
		.rebalance()
		.keyBy(id -> id.key)
		.process(new KeyedStatefulOperator())
		.uid(uid)
		.addSink(new DiscardingSink<>());

	JobGraph jobGraph = streamEnv.getStreamGraph().getJobGraph();

	String path = takeSavepoint(jobGraph);

	ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
	ExistingSavepoint savepoint = Savepoint.load(batchEnv, path, backend);

	List<Pojo> results = savepoint
		.readKeyedState(uid, new Reader())
		.collect();

	Set<Pojo> expected = SavepointSource.getElements();

	Assert.assertEquals("Unexpected results from keyed state", expected, new HashSet<>(results));
}
 
Example 5
Source File: YARNITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph getTestingJobGraph() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	env.addSource(new NoDataSource())
		.shuffle()
		.addSink(new DiscardingSink<>());

	return env.getStreamGraph().getJobGraph();
}
 
Example 6
Source File: FromCollection.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    //创建一个List,里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 1));

    //通过List创建DataStream
    DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list);

    //通过多个Tuple2元素创建DataStream
    DataStream<Tuple2<String, Integer>> fromElementDataStream = env.fromElements(
            new Tuple2("ccc", 1),
            new Tuple2("ddd", 1),
            new Tuple2("aaa", 1)
    );

    //通过union将两个DataStream合成一个
    DataStream<Tuple2<String, Integer>> unionDataStream = fromCollectionDataStream.union(fromElementDataStream);

    //统计每个单词的数量
    unionDataStream
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : collection");
}
 
Example 7
Source File: IntervalJoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoundsCanBeExclusive() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(2))
		.upperBoundExclusive()
		.lowerBoundExclusive()
		.process(new CombineToStringJoinFunction())
		.addSink(new ResultSink());

	env.execute();

	expectInAnyOrder(
		"(key,0):(key,1)",
		"(key,1):(key,2)"
	);
}
 
Example 8
Source File: RichParrelSourceFunctionDemo.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //并行度为2
    env.setParallelism(2);

    DataStream<Student> dataStream = env.addSource(new MySQLParrelDataSource());
    dataStream.print();

    env.execute("Customize DataSource demo : RichParrelSourceFunction");
}
 
Example 9
Source File: IntervalJoinITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoundsCanBeInclusive() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(2))
		.process(new CombineToStringJoinFunction())
		.addSink(new ResultSink());

	env.execute();

	expectInAnyOrder(
		"(key,0):(key,0)",
		"(key,0):(key,1)",
		"(key,0):(key,2)",

		"(key,1):(key,1)",
		"(key,1):(key,2)",

		"(key,2):(key,2)"
	);
}
 
Example 10
Source File: JdbcITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
@Ignore
public void testInsert() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration());
	env.setParallelism(1);
	env.fromElements(TEST_DATA)
		.addSink(JdbcSink.sink(
			String.format(INSERT_TEMPLATE, INPUT_TABLE),
			(ps, t) -> {
				ps.setInt(1, t.id);
				ps.setString(2, t.title);
				ps.setString(3, t.author);
				if (t.price == null) {
					ps.setNull(4, Types.DOUBLE);
				} else {
					ps.setDouble(4, t.price);
				}
				ps.setInt(5, t.qty);
			},
			new JdbcConnectionOptionsBuilder()
				.withUrl(getDbMetadata().getUrl())
				.withDriverName(getDbMetadata().getDriverClass())
				.build()));
	env.execute();

	assertEquals(Arrays.asList(TEST_DATA), selectBooks());
}
 
Example 11
Source File: ReinterpretDataStreamAsKeyedStreamITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}
 
Example 12
Source File: FlinkSQLDistinctExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String ddlSource = "CREATE TABLE user_behavior (\n" +
            "    user_id BIGINT,\n" +
            "    item_id BIGINT,\n" +
            "    category_id BIGINT,\n" +
            "    behavior STRING,\n" +
            "    ts TIMESTAMP(3)\n" +
            ") WITH (\n" +
            "    'connector.type' = 'kafka',\n" +
            "    'connector.version' = '0.11',\n" +
            "    'connector.topic' = 'user_behavior',\n" +
            "    'connector.startup-mode' = 'latest-offset',\n" +
            "    'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
            "    'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
            "    'format.type' = 'json'\n" +
            ")";

    String countSql = "select user_id, count(user_id) from user_behavior group by user_id";

    blinkStreamTableEnv.sqlUpdate(ddlSource);
    Table countTable = blinkStreamTableEnv.sqlQuery(countSql);
    blinkStreamTableEnv.toRetractStream(countTable, Row.class).print();

    String distinctSql = "select distinct(user_id) from user_behavior";
    Table distinctTable = blinkStreamTableEnv.sqlQuery(distinctSql);
    blinkStreamTableEnv.toRetractStream(distinctTable, Row.class).print("==");

    blinkStreamTableEnv.execute("Blink Stream SQL count/distinct demo");
}
 
Example 13
Source File: TimestampITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether timestamps are properly extracted in the timestamp
 * extractor and whether watermarks are also correctly forwarded from this with the auto watermark
 * interval.
 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
	final int numElements = 10;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.getConfig().setAutoWatermarkInterval(10);
	env.setParallelism(1);
	env.getConfig().disableSysoutLogging();

	DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			int index = 1;
			while (index <= numElements) {
				ctx.collect(index);
				latch.await();
				index++;
			}
		}

		@Override
		public void cancel() {}
	});

	DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(
			new AscendingTimestampExtractor<Integer>() {
				@Override
				public long extractAscendingTimestamp(Integer element) {
					return element;
				}
			});

	extractOp
			.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true))
			.transform("Timestamp Check",
					BasicTypeInfo.INT_TYPE_INFO,
					new TimestampCheckingOperator());

	// verify that extractor picks up source parallelism
	Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());

	env.execute();

	// verify that we get NUM_ELEMENTS watermarks
	for (int j = 0; j < numElements; j++) {
		if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
			long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
			Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
		}
	}

	// the input is finite, so it should have a MAX Watermark
	assertEquals(Watermark.MAX_WATERMARK,
			CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
 
Example 14
Source File: PartitionerITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void partitionerTest() {

	TestListResultSink<Tuple2<Integer, String>> hashPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> customPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> broadcastPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> forwardPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> rebalancePartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> globalPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	DataStream<Tuple1<String>> src = env.fromCollection(
		INPUT.stream().map(Tuple1::of).collect(Collectors.toList()));

	// partition by hash
	src
			.keyBy(0)
			.map(new SubtaskIndexAssigner())
			.addSink(hashPartitionResultSink);

	// partition custom
	DataStream<Tuple2<Integer, String>> partitionCustom = src
			.partitionCustom(new Partitioner<String>() {
				@Override
				public int partition(String key, int numPartitions) {
					if (key.equals("c")) {
						return 2;
					} else {
						return 0;
					}
				}
			}, 0)
			.map(new SubtaskIndexAssigner());

	partitionCustom.addSink(customPartitionResultSink);

	// partition broadcast
	src.broadcast().map(new SubtaskIndexAssigner()).addSink(broadcastPartitionResultSink);

	// partition rebalance
	src.rebalance().map(new SubtaskIndexAssigner()).addSink(rebalancePartitionResultSink);

	// partition forward
	src.map(new MapFunction<Tuple1<String>, Tuple1<String>>() {
		private static final long serialVersionUID = 1L;
		@Override
		public Tuple1<String> map(Tuple1<String> value) throws Exception {
			return value;
		}
	})
			.forward()
			.map(new SubtaskIndexAssigner())
			.addSink(forwardPartitionResultSink);

	// partition global
	src.global().map(new SubtaskIndexAssigner()).addSink(globalPartitionResultSink);

	try {
		env.execute();
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}

	List<Tuple2<Integer, String>> hashPartitionResult = hashPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> customPartitionResult = customPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> broadcastPartitionResult = broadcastPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> forwardPartitionResult = forwardPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> rebalancePartitionResult = rebalancePartitionResultSink.getResult();
	List<Tuple2<Integer, String>> globalPartitionResult = globalPartitionResultSink.getResult();

	verifyHashPartitioning(hashPartitionResult);
	verifyCustomPartitioning(customPartitionResult);
	verifyBroadcastPartitioning(broadcastPartitionResult);
	verifyRebalancePartitioning(forwardPartitionResult);
	verifyRebalancePartitioning(rebalancePartitionResult);
	verifyGlobalPartitioning(globalPartitionResult);
}
 
Example 15
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public void runKeyValueTest() throws Exception {
	final String topic = "keyvaluetest";
	createTestTopic(topic, 1, 1);
	final int elementCount = 5000;

	// ----------- Write some data into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
		@Override
		public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
			Random rnd = new Random(1337);
			for (long i = 0; i < elementCount; i++) {
				PojoValue pojo = new PojoValue();
				pojo.when = new Date(rnd.nextLong());
				pojo.lon = rnd.nextLong();
				pojo.lat = i;
				// make every second key null to ensure proper "null" serialization
				Long key = (i % 2 == 0) ? null : i;
				ctx.collect(new Tuple2<>(key, pojo));
			}
		}

		@Override
		public void cancel() {
		}
	});

	KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "3");
	kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
	env.execute("Write KV to Kafka");

	// ----------- Read the data again -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	KafkaDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
	fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
		long counter = 0;
		@Override
		public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
			// the elements should be in order.
			Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
			if (value.f1.lat % 2 == 0) {
				assertNull("key was not null", value.f0);
			} else {
				Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
			}
			counter++;
			if (counter == elementCount) {
				// we got the right number of elements
				throw new SuccessException();
			}
		}
	});

	tryExecute(env, "Read KV from Kafka");

	deleteTestTopic(topic);
}
 
Example 16
Source File: WindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testAggregatingTumblingProcessingTimeWindow() {
	final int numElements = 3000;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(timeCharacteristic);
		env.getConfig().setAutoWatermarkInterval(10);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		SinkValidatorUpdaterAndChecker updaterAndChecker =
			new SinkValidatorUpdaterAndChecker(numElements, 1);
		env
				.addSource(new FailingSource(new Generator(), numElements, timeCharacteristic))
				.map(new MapFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>>() {
					@Override
					public Tuple2<Long, IntType> map(Tuple2<Long, IntType> value) {
						value.f1.value = 1;
						return value;
					}
				})
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(100, MILLISECONDS))
				.reduce(new ReduceFunction<Tuple2<Long, IntType>>() {

					@Override
					public Tuple2<Long, IntType> reduce(
							Tuple2<Long, IntType> a,
							Tuple2<Long, IntType> b) {
						return new Tuple2<>(a.f0, new IntType(1));
					}
				})
			.addSink(new ValidatingSink<>(updaterAndChecker, updaterAndChecker, timeCharacteristic))
			.setParallelism(1);

		tryExecute(env, "Aggregating Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 17
Source File: CoGroupJoinITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSelfJoin() throws Exception {

	testResults = new ArrayList<>();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
			ctx.collect(Tuple3.of("a", "x", 0));
			ctx.collect(Tuple3.of("a", "y", 1));
			ctx.collect(Tuple3.of("a", "z", 2));

			ctx.collect(Tuple3.of("b", "u", 3));
			ctx.collect(Tuple3.of("b", "w", 5));

			ctx.collect(Tuple3.of("a", "i", 6));
			ctx.collect(Tuple3.of("a", "j", 7));
			ctx.collect(Tuple3.of("a", "k", 8));

			// source is finite, so it will have an implicit MAX watermark when it finishes
		}

		@Override
		public void cancel() {
		}
	}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());

	source1.join(source1)
			.where(new Tuple3KeyExtractor())
			.equalTo(new Tuple3KeyExtractor())
			.window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS)))
			.apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
				@Override
				public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
					return first + ":" + second;
				}
			})
			.addSink(new SinkFunction<String>() {
				@Override
				public void invoke(String value) throws Exception {
					testResults.add(value);
				}
			});

	env.execute("Self-Join Test");

	List<String> expectedResult = Arrays.asList(
			"(a,x,0):(a,x,0)",
			"(a,x,0):(a,y,1)",
			"(a,x,0):(a,z,2)",
			"(a,y,1):(a,x,0)",
			"(a,y,1):(a,y,1)",
			"(a,y,1):(a,z,2)",
			"(a,z,2):(a,x,0)",
			"(a,z,2):(a,y,1)",
			"(a,z,2):(a,z,2)",
			"(b,u,3):(b,u,3)",
			"(b,u,3):(b,w,5)",
			"(b,w,5):(b,u,3)",
			"(b,w,5):(b,w,5)",
			"(a,i,6):(a,i,6)",
			"(a,i,6):(a,j,7)",
			"(a,i,6):(a,k,8)",
			"(a,j,7):(a,i,6)",
			"(a,j,7):(a,j,7)",
			"(a,j,7):(a,k,8)",
			"(a,k,8):(a,i,6)",
			"(a,k,8):(a,j,7)",
			"(a,k,8):(a,k,8)");

	Collections.sort(expectedResult);
	Collections.sort(testResults);

	Assert.assertEquals(expectedResult, testResults);
}
 
Example 18
Source File: EventTimeAllWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedTumblingTimeWindow() {
	final int numElementsPerKey = 3000;
	final int windowSize = 100;
	final int numKeys = 1;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();

		env
			.addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey))
				.rebalance()
				.timeWindowAll(Time.of(windowSize, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
			.addSink(new ValidatingSink<>(
				new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey),
				new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize)))
			.setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 19
Source File: Kafka010ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 20
Source File: EventTimeWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setMaxParallelism(2 * PARALLELISM);
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
					env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						int sum = 0;
						long key = -1;

						for (Tuple2<Long, IntType> value : values) {
							sum += value.f1.value;
							key = value.f0;
						}
						final Tuple4<Long, Long, Long, IntType> output =
							new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum));
						out.collect(output);
					}
				})
			.addSink(new ValidatingSink<>(
				new SinkValidatorUpdateFun(numElementsPerKey),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}