org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setParallelism

Source File: ConsumeFromDynamoDBStreams.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties dynamodbStreamsConsumerConfig = new Properties();
	final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME);
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>(
			streamName,
			new SimpleStringSchema(),
			dynamodbStreamsConsumerConfig));

	dynamodbStreams.print();

	see.execute();
}

Source File: SavepointReaderITTestBase.java From flink with Apache License 2.0

5 votes

@Test
public void testOperatorStateInputFormat() throws Exception {
	StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	streamEnv.setParallelism(4);

	DataStream<Integer> data = streamEnv
		.addSource(new SavepointSource())
		.rebalance();

	data
		.connect(data.broadcast(broadcast))
		.process(new StatefulOperator(list, union, broadcast))
		.uid(UID)
		.addSink(new DiscardingSink<>());

	JobGraph jobGraph = streamEnv.getStreamGraph().getJobGraph();

	String savepoint = takeSavepoint(jobGraph);

	ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();

	verifyListState(savepoint, batchEnv);

	verifyUnionState(savepoint, batchEnv);

	verifyBroadcastState(savepoint, batchEnv);
}

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: SavepointReaderKeyedStateITCase.java From flink with Apache License 2.0

5 votes

private void runKeyedState(StateBackend backend) throws Exception {
	StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	streamEnv.setStateBackend(backend);
	streamEnv.setParallelism(4);

	streamEnv
		.addSource(new SavepointSource())
		.rebalance()
		.keyBy(id -> id.key)
		.process(new KeyedStatefulOperator())
		.uid(uid)
		.addSink(new DiscardingSink<>());

	JobGraph jobGraph = streamEnv.getStreamGraph().getJobGraph();

	String path = takeSavepoint(jobGraph);

	ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
	ExistingSavepoint savepoint = Savepoint.load(batchEnv, path, backend);

	List<Pojo> results = savepoint
		.readKeyedState(uid, new Reader())
		.collect();

	Set<Pojo> expected = SavepointSource.getElements();

	Assert.assertEquals("Unexpected results from keyed state", expected, new HashSet<>(results));
}

Source File: YARNITCase.java From flink with Apache License 2.0

5 votes

private JobGraph getTestingJobGraph() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	env.addSource(new NoDataSource())
		.shuffle()
		.addSink(new DiscardingSink<>());

	return env.getStreamGraph().getJobGraph();
}

Source File: FromCollection.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    //创建一个List，里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 1));

    //通过List创建DataStream
    DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list);

    //通过多个Tuple2元素创建DataStream
    DataStream<Tuple2<String, Integer>> fromElementDataStream = env.fromElements(
            new Tuple2("ccc", 1),
            new Tuple2("ddd", 1),
            new Tuple2("aaa", 1)
    );

    //通过union将两个DataStream合成一个
    DataStream<Tuple2<String, Integer>> unionDataStream = fromCollectionDataStream.union(fromElementDataStream);

    //统计每个单词的数量
    unionDataStream
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : collection");
}

Source File: IntervalJoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testBoundsCanBeExclusive() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(2))
		.upperBoundExclusive()
		.lowerBoundExclusive()
		.process(new CombineToStringJoinFunction())
		.addSink(new ResultSink());

	env.execute();

	expectInAnyOrder(
		"(key,0):(key,1)",
		"(key,1):(key,2)"
	);
}

Source File: RichParrelSourceFunctionDemo.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //并行度为2
    env.setParallelism(2);

    DataStream<Student> dataStream = env.addSource(new MySQLParrelDataSource());
    dataStream.print();

    env.execute("Customize DataSource demo : RichParrelSourceFunction");
}

Source File: IntervalJoinITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testBoundsCanBeInclusive() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(
		Tuple2.of("key", 0),
		Tuple2.of("key", 1),
		Tuple2.of("key", 2)
	).assignTimestampsAndWatermarks(new AscendingTuple2TimestampExtractor());

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(2))
		.process(new CombineToStringJoinFunction())
		.addSink(new ResultSink());

	env.execute();

	expectInAnyOrder(
		"(key,0):(key,0)",
		"(key,0):(key,1)",
		"(key,0):(key,2)",

		"(key,1):(key,1)",
		"(key,1):(key,2)",

		"(key,2):(key,2)"
	);
}

Source File: JdbcITCase.java From flink with Apache License 2.0

5 votes

@Test
@Ignore
public void testInsert() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration());
	env.setParallelism(1);
	env.fromElements(TEST_DATA)
		.addSink(JdbcSink.sink(
			String.format(INSERT_TEMPLATE, INPUT_TABLE),
			(ps, t) -> {
				ps.setInt(1, t.id);
				ps.setString(2, t.title);
				ps.setString(3, t.author);
				if (t.price == null) {
					ps.setNull(4, Types.DOUBLE);
				} else {
					ps.setDouble(4, t.price);
				}
				ps.setInt(5, t.qty);
			},
			new JdbcConnectionOptionsBuilder()
				.withUrl(getDbMetadata().getUrl())
				.withDriverName(getDbMetadata().getDriverClass())
				.build()));
	env.execute();

	assertEquals(Arrays.asList(TEST_DATA), selectBooks());
}

Source File: ReinterpretDataStreamAsKeyedStreamITCase.java From flink with Apache License 2.0

5 votes

/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}

Source File: FlinkSQLDistinctExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String ddlSource = "CREATE TABLE user_behavior (\n" +
            "    user_id BIGINT,\n" +
            "    item_id BIGINT,\n" +
            "    category_id BIGINT,\n" +
            "    behavior STRING,\n" +
            "    ts TIMESTAMP(3)\n" +
            ") WITH (\n" +
            "    'connector.type' = 'kafka',\n" +
            "    'connector.version' = '0.11',\n" +
            "    'connector.topic' = 'user_behavior',\n" +
            "    'connector.startup-mode' = 'latest-offset',\n" +
            "    'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
            "    'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
            "    'format.type' = 'json'\n" +
            ")";

    String countSql = "select user_id, count(user_id) from user_behavior group by user_id";

    blinkStreamTableEnv.sqlUpdate(ddlSource);
    Table countTable = blinkStreamTableEnv.sqlQuery(countSql);
    blinkStreamTableEnv.toRetractStream(countTable, Row.class).print();

    String distinctSql = "select distinct(user_id) from user_behavior";
    Table distinctTable = blinkStreamTableEnv.sqlQuery(distinctSql);
    blinkStreamTableEnv.toRetractStream(distinctTable, Row.class).print("==");

    blinkStreamTableEnv.execute("Blink Stream SQL count/distinct demo");
}

Source File: TimestampITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * This tests whether timestamps are properly extracted in the timestamp
 * extractor and whether watermarks are also correctly forwarded from this with the auto watermark
 * interval.
 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
	final int numElements = 10;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.getConfig().setAutoWatermarkInterval(10);
	env.setParallelism(1);
	env.getConfig().disableSysoutLogging();

	DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			int index = 1;
			while (index <= numElements) {
				ctx.collect(index);
				latch.await();
				index++;
			}
		}

		@Override
		public void cancel() {}
	});

	DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(
			new AscendingTimestampExtractor<Integer>() {
				@Override
				public long extractAscendingTimestamp(Integer element) {
					return element;
				}
			});

	extractOp
			.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true))
			.transform("Timestamp Check",
					BasicTypeInfo.INT_TYPE_INFO,
					new TimestampCheckingOperator());

	// verify that extractor picks up source parallelism
	Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());

	env.execute();

	// verify that we get NUM_ELEMENTS watermarks
	for (int j = 0; j < numElements; j++) {
		if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
			long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
			Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
		}
	}

	// the input is finite, so it should have a MAX Watermark
	assertEquals(Watermark.MAX_WATERMARK,
			CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}

Source File: PartitionerITCase.java From flink with Apache License 2.0

4 votes

@Test
public void partitionerTest() {

	TestListResultSink<Tuple2<Integer, String>> hashPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> customPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> broadcastPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> forwardPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> rebalancePartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();
	TestListResultSink<Tuple2<Integer, String>> globalPartitionResultSink =
			new TestListResultSink<Tuple2<Integer, String>>();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	DataStream<Tuple1<String>> src = env.fromCollection(
		INPUT.stream().map(Tuple1::of).collect(Collectors.toList()));

	// partition by hash
	src
			.keyBy(0)
			.map(new SubtaskIndexAssigner())
			.addSink(hashPartitionResultSink);

	// partition custom
	DataStream<Tuple2<Integer, String>> partitionCustom = src
			.partitionCustom(new Partitioner<String>() {
				@Override
				public int partition(String key, int numPartitions) {
					if (key.equals("c")) {
						return 2;
					} else {
						return 0;
					}
				}
			}, 0)
			.map(new SubtaskIndexAssigner());

	partitionCustom.addSink(customPartitionResultSink);

	// partition broadcast
	src.broadcast().map(new SubtaskIndexAssigner()).addSink(broadcastPartitionResultSink);

	// partition rebalance
	src.rebalance().map(new SubtaskIndexAssigner()).addSink(rebalancePartitionResultSink);

	// partition forward
	src.map(new MapFunction<Tuple1<String>, Tuple1<String>>() {
		private static final long serialVersionUID = 1L;
		@Override
		public Tuple1<String> map(Tuple1<String> value) throws Exception {
			return value;
		}
	})
			.forward()
			.map(new SubtaskIndexAssigner())
			.addSink(forwardPartitionResultSink);

	// partition global
	src.global().map(new SubtaskIndexAssigner()).addSink(globalPartitionResultSink);

	try {
		env.execute();
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}

	List<Tuple2<Integer, String>> hashPartitionResult = hashPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> customPartitionResult = customPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> broadcastPartitionResult = broadcastPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> forwardPartitionResult = forwardPartitionResultSink.getResult();
	List<Tuple2<Integer, String>> rebalancePartitionResult = rebalancePartitionResultSink.getResult();
	List<Tuple2<Integer, String>> globalPartitionResult = globalPartitionResultSink.getResult();

	verifyHashPartitioning(hashPartitionResult);
	verifyCustomPartitioning(customPartitionResult);
	verifyBroadcastPartitioning(broadcastPartitionResult);
	verifyRebalancePartitioning(forwardPartitionResult);
	verifyRebalancePartitioning(rebalancePartitionResult);
	verifyGlobalPartitioning(globalPartitionResult);
}

Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

public void runKeyValueTest() throws Exception {
	final String topic = "keyvaluetest";
	createTestTopic(topic, 1, 1);
	final int elementCount = 5000;

	// ----------- Write some data into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
		@Override
		public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
			Random rnd = new Random(1337);
			for (long i = 0; i < elementCount; i++) {
				PojoValue pojo = new PojoValue();
				pojo.when = new Date(rnd.nextLong());
				pojo.lon = rnd.nextLong();
				pojo.lat = i;
				// make every second key null to ensure proper "null" serialization
				Long key = (i % 2 == 0) ? null : i;
				ctx.collect(new Tuple2<>(key, pojo));
			}
		}

		@Override
		public void cancel() {
		}
	});

	KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "3");
	kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
	env.execute("Write KV to Kafka");

	// ----------- Read the data again -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	KafkaDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
	fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
		long counter = 0;
		@Override
		public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
			// the elements should be in order.
			Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
			if (value.f1.lat % 2 == 0) {
				assertNull("key was not null", value.f0);
			} else {
				Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
			}
			counter++;
			if (counter == elementCount) {
				// we got the right number of elements
				throw new SuccessException();
			}
		}
	});

	tryExecute(env, "Read KV from Kafka");

	deleteTestTopic(topic);
}

Source File: WindowCheckpointingITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testAggregatingTumblingProcessingTimeWindow() {
	final int numElements = 3000;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(timeCharacteristic);
		env.getConfig().setAutoWatermarkInterval(10);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		SinkValidatorUpdaterAndChecker updaterAndChecker =
			new SinkValidatorUpdaterAndChecker(numElements, 1);
		env
				.addSource(new FailingSource(new Generator(), numElements, timeCharacteristic))
				.map(new MapFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>>() {
					@Override
					public Tuple2<Long, IntType> map(Tuple2<Long, IntType> value) {
						value.f1.value = 1;
						return value;
					}
				})
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(100, MILLISECONDS))
				.reduce(new ReduceFunction<Tuple2<Long, IntType>>() {

					@Override
					public Tuple2<Long, IntType> reduce(
							Tuple2<Long, IntType> a,
							Tuple2<Long, IntType> b) {
						return new Tuple2<>(a.f0, new IntType(1));
					}
				})
			.addSink(new ValidatingSink<>(updaterAndChecker, updaterAndChecker, timeCharacteristic))
			.setParallelism(1);

		tryExecute(env, "Aggregating Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: CoGroupJoinITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testSelfJoin() throws Exception {

	testResults = new ArrayList<>();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
			ctx.collect(Tuple3.of("a", "x", 0));
			ctx.collect(Tuple3.of("a", "y", 1));
			ctx.collect(Tuple3.of("a", "z", 2));

			ctx.collect(Tuple3.of("b", "u", 3));
			ctx.collect(Tuple3.of("b", "w", 5));

			ctx.collect(Tuple3.of("a", "i", 6));
			ctx.collect(Tuple3.of("a", "j", 7));
			ctx.collect(Tuple3.of("a", "k", 8));

			// source is finite, so it will have an implicit MAX watermark when it finishes
		}

		@Override
		public void cancel() {
		}
	}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());

	source1.join(source1)
			.where(new Tuple3KeyExtractor())
			.equalTo(new Tuple3KeyExtractor())
			.window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS)))
			.apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
				@Override
				public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
					return first + ":" + second;
				}
			})
			.addSink(new SinkFunction<String>() {
				@Override
				public void invoke(String value) throws Exception {
					testResults.add(value);
				}
			});

	env.execute("Self-Join Test");

	List<String> expectedResult = Arrays.asList(
			"(a,x,0):(a,x,0)",
			"(a,x,0):(a,y,1)",
			"(a,x,0):(a,z,2)",
			"(a,y,1):(a,x,0)",
			"(a,y,1):(a,y,1)",
			"(a,y,1):(a,z,2)",
			"(a,z,2):(a,x,0)",
			"(a,z,2):(a,y,1)",
			"(a,z,2):(a,z,2)",
			"(b,u,3):(b,u,3)",
			"(b,u,3):(b,w,5)",
			"(b,w,5):(b,u,3)",
			"(b,w,5):(b,w,5)",
			"(a,i,6):(a,i,6)",
			"(a,i,6):(a,j,7)",
			"(a,i,6):(a,k,8)",
			"(a,j,7):(a,i,6)",
			"(a,j,7):(a,j,7)",
			"(a,j,7):(a,k,8)",
			"(a,k,8):(a,i,6)",
			"(a,k,8):(a,j,7)",
			"(a,k,8):(a,k,8)");

	Collections.sort(expectedResult);
	Collections.sort(testResults);

	Assert.assertEquals(expectedResult, testResults);
}

Source File: EventTimeAllWindowCheckpointingITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testPreAggregatedTumblingTimeWindow() {
	final int numElementsPerKey = 3000;
	final int windowSize = 100;
	final int numKeys = 1;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();

		env
			.addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey))
				.rebalance()
				.timeWindowAll(Time.of(windowSize, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
			.addSink(new ValidatingSink<>(
				new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey),
				new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize)))
			.setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: Kafka010ITCase.java From flink with Apache License 2.0

4 votes

/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}

Source File: EventTimeWindowCheckpointingITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setMaxParallelism(2 * PARALLELISM);
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
					env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						int sum = 0;
						long key = -1;

						for (Tuple2<Long, IntType> value : values) {
							sum += value.f1.value;
							key = value.f0;
						}
						final Tuple4<Long, Long, Long, IntType> output =
							new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum));
						out.collect(output);
					}
				})
			.addSink(new ValidatingSink<>(
				new SinkValidatorUpdateFun(numElementsPerKey),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setParallelism()