Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#enableCheckpointing()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#enableCheckpointing() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamingScalabilityAndLatency.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void runPartitioningProgram(int parallelism) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.getConfig().enableObjectReuse();

	env.setBufferTimeout(5L);
	env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);

	env
		.addSource(new TimeStampingSource())
		.map(new IdMapper<Tuple2<Long, Long>>())
		.keyBy(0)
		.addSink(new TimestampingSink());

	env.execute("Partitioning Program");
}
 
Example 2
Source File: TwoInputBenchmark.java    From flink-benchmarks with Apache License 2.0 6 votes vote down vote up
@Benchmark
@OperationsPerInvocation(value = TwoInputBenchmark.ONE_IDLE_RECORDS_PER_INVOCATION)
public void twoInputOneIdleMapSink(FlinkEnvironmentContext context) throws Exception {

	StreamExecutionEnvironment env = context.env;
	env.enableCheckpointing(CHECKPOINT_INTERVAL_MS);
	env.setParallelism(1);

	QueuingLongSource.reset();
	DataStreamSource<Long> source1 = env.addSource(new QueuingLongSource(1, ONE_IDLE_RECORDS_PER_INVOCATION - 1));
	DataStreamSource<Long> source2 = env.addSource(new QueuingLongSource(2, 1));

	source1
			.connect(source2)
			.transform("custom operator", TypeInformation.of(Long.class), new MultiplyByTwoCoStreamMap())
			.addSink(new DiscardingSink<>());

	env.execute();
}
 
Example 3
Source File: RestartStrategyTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Checks that in a streaming use case where checkpointing is enabled and the number
 * of execution retries is set to 42 and the delay to 1337, fixed delay restarting is used.
 */
@Test
public void testFixedRestartingWhenCheckpointingAndExplicitExecutionRetriesNonZero() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setNumberOfExecutionRetries(42);
	env.getConfig().setExecutionRetryDelay(1337);

	env.fromElements(1).print();

	StreamGraph graph = env.getStreamGraph();
	JobGraph jobGraph = graph.getJobGraph();

	RestartStrategies.RestartStrategyConfiguration restartStrategy =
		jobGraph.getSerializedExecutionConfig().deserializeValue(getClass().getClassLoader()).getRestartStrategy();

	Assert.assertNotNull(restartStrategy);
	Assert.assertTrue(restartStrategy instanceof RestartStrategies.FixedDelayRestartStrategyConfiguration);
	Assert.assertEquals(42, ((RestartStrategies.FixedDelayRestartStrategyConfiguration) restartStrategy).getRestartAttempts());
	Assert.assertEquals(1337, ((RestartStrategies.FixedDelayRestartStrategyConfiguration) restartStrategy).getDelayBetweenAttemptsInterval().toMilliseconds());
}
 
Example 4
Source File: AvroStreamingFileSinkITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteAvroGeneric() throws Exception {
	File folder = TEMPORARY_FOLDER.newFolder();

	Schema schema = Address.getClassSchema();
	Collection<GenericRecord> data = new GenericTestDataCollection();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	AvroWriterFactory<GenericRecord> avroWriterFactory = AvroWriters.forGenericRecord(schema);
	DataStream<GenericRecord> stream = env.addSource(
		new FiniteTestSource<>(data),
		new GenericRecordAvroTypeInfo(schema));
	stream.addSink(StreamingFileSink.forBulkFormat(
		Path.fromLocalFile(folder),
		avroWriterFactory).build());
	env.execute();

	validateResults(folder, new GenericDatumReader<>(schema), new ArrayList<>(data));
}
 
Example 5
Source File: ParquetStreamingFileSinkITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteParquetAvroReflect() throws Exception {

	final File folder = TEMPORARY_FOLDER.newFolder();

	final List<Datum> data = Arrays.asList(
			new Datum("a", 1), new Datum("b", 2), new Datum("c", 3));

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Datum> stream = env.addSource(
			new FiniteTestSource<>(data), TypeInformation.of(Datum.class));

	stream.addSink(
			StreamingFileSink.forBulkFormat(
					Path.fromLocalFile(folder),
					ParquetAvroWriters.forReflectRecord(Datum.class))
					.build());

	env.execute();

	validateResults(folder, ReflectData.get(), data);
}
 
Example 6
Source File: RestartStrategyTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Checks that in a streaming use case where checkpointing is enabled and the number
 * of execution retries is set to 0, restarting is deactivated.
 */
@Test
public void testNoRestartingWhenCheckpointingAndExplicitExecutionRetriesZero() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setNumberOfExecutionRetries(0);

	env.fromElements(1).print();

	StreamGraph graph = env.getStreamGraph();
	JobGraph jobGraph = graph.getJobGraph();

	RestartStrategies.RestartStrategyConfiguration restartStrategy =
		jobGraph.getSerializedExecutionConfig().deserializeValue(getClass().getClassLoader()).getRestartStrategy();

	Assert.assertNotNull(restartStrategy);
	Assert.assertTrue(restartStrategy instanceof RestartStrategies.NoRestartStrategyConfiguration);
}
 
Example 7
Source File: InputBenchmark.java    From flink-benchmarks with Apache License 2.0 6 votes vote down vote up
@Benchmark
public void mapRebalanceMapSink(FlinkEnvironmentContext context) throws Exception {

	StreamExecutionEnvironment env = context.env;
	env.enableCheckpointing(CHECKPOINT_INTERVAL_MS);
	env.setParallelism(1);

	DataStreamSource<Long> source = env.addSource(new LongSource(RECORDS_PER_INVOCATION));
	source
		.map(new MultiplyByTwo())
		.rebalance()
		.map((Long in) -> in)
		.addSink(new DiscardingSink<>());

	env.execute();
}
 
Example 8
Source File: RescalingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState(
		int parallelism,
		int maxParallelism,
		int fixedParallelism,
		int numberKeys,
		int numberElements,
		boolean terminateAfterEmission,
		int checkpointingInterval) {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.getConfig().setMaxParallelism(maxParallelism);
	env.enableCheckpointing(checkpointingInterval);
	env.setRestartStrategy(RestartStrategies.noRestart());

	DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource(
			numberKeys,
			numberElements,
			terminateAfterEmission))
			.setParallelism(fixedParallelism)
			.keyBy(new KeySelector<Integer, Integer>() {
				private static final long serialVersionUID = -7952298871120320940L;

				@Override
				public Integer getKey(Integer value) throws Exception {
					return value;
				}
			});

	SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);

	DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));

	result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());

	return env.getStreamGraph().getJobGraph();
}
 
Example 9
Source File: CheckpointedStreamingProgram.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.getConfig().disableSysoutLogging();
	env.enableCheckpointing(CHECKPOINT_INTERVALL);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 100L));
	env.disableOperatorChaining();

	DataStream<String> text = env.addSource(new SimpleStringGenerator());
	text.map(new StatefulMapper()).addSink(new NoOpSink());
	env.setParallelism(1);
	env.execute("Checkpointed Streaming Program");
}
 
Example 10
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    if (parameterTool.getNumberOfParameters() < 4) {
        System.out.println("Missing parameters!\n" +
                "Usage: flink run PubSub.jar --input-subscription <subscription> --input-topicName <topic> --output-topicName <output-topic> " +
                "--google-project <google project name> ");
        return;
    }

    String projectName = parameterTool.getRequired("stream.project.name");
    String inputTopicName = parameterTool.getRequired("stream.input.topicName");
    String subscriptionName = parameterTool.getRequired("stream.input.subscription");
    String outputTopicName = parameterTool.getRequired("stream.output.topicName");

    PubSubPublisherUtil pubSubPublisher = new PubSubPublisherUtil(projectName, inputTopicName);
    pubSubPublisher.publish(10);


    env.addSource(PubSubSource.newBuilder()
            .withDeserializationSchema(new IntegerSerializer())
            .withProjectName(projectName)
            .withSubscriptionName(subscriptionName)
            .withMessageRateLimit(1)
            .build())
            .map(Main::printAndReturn).disableChaining()
            .addSink(PubSubSink.newBuilder()
                    .withSerializationSchema(new IntegerSerializer())
                    .withProjectName(projectName)
                    .withTopicName(outputTopicName).build());
    env.enableCheckpointing(parameterTool.getLong(PropertiesConstants.STREAM_CHECKPOINT_INTERVAL, 1000L));
    env.execute("Flink connector gcp pubsub test");
}
 
Example 11
Source File: UnionListStateExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }
 
Example 12
Source File: ContinuousFileProcessingCheckpointITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// set the restart strategy.
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
	env.enableCheckpointing(10);

	// create and start the file creating thread.
	fc = new FileCreator();
	fc.start();

	// create the monitoring source along with the necessary readers.
	TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	DataStream<String> inputStream = env.readFile(format, localFsURI,
		FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);

	TestingSinkFunction sink = new TestingSinkFunction();

	inputStream.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public void flatMap(String value, Collector<String> out) throws Exception {
			out.collect(value);
		}
	}).addSink(sink).setParallelism(1);
}
 
Example 13
Source File: Elasticsearch5SinkExample.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool parameterTool = ParameterTool.fromArgs(args);

		if (parameterTool.getNumberOfParameters() < 3) {
			System.out.println("Missing parameters!\n" +
				"Usage: --numRecords <numRecords> --index <index> --type <type>");
			return;
		}

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
				env.enableCheckpointing(5000);

		DataStream<String> source = env.generateSequence(0, parameterTool.getInt("numRecords") - 1)
			.map(new MapFunction<Long, String>() {
				@Override
				public String map(Long value) throws Exception {
					return "message #" + value;
				}
			});

		Map<String, String> userConfig = new HashMap<>();
		userConfig.put("cluster.name", "elasticsearch");
		// This instructs the sink to emit after every element, otherwise they would be buffered
		userConfig.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");

		List<InetSocketAddress> transports = new ArrayList<>();
		transports.add(new InetSocketAddress(InetAddress.getByName("127.0.0.1"), 9300));

		source.addSink(new ElasticsearchSink<>(userConfig, transports, new ElasticsearchSinkFunction<String>() {
			@Override
			public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
				indexer.add(createIndexRequest(element, parameterTool));
			}
		}));

		env.execute("Elasticsearch5.x end to end sink test example");
	}
 
Example 14
Source File: RescalingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private static JobGraph createJobGraphWithOperatorState(
		int parallelism, int maxParallelism, OperatorCheckpointMethod checkpointMethod) {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.getConfig().setMaxParallelism(maxParallelism);
	env.enableCheckpointing(Long.MAX_VALUE);
	env.setRestartStrategy(RestartStrategies.noRestart());

	StateSourceBase.workStartedLatch = new CountDownLatch(parallelism);

	SourceFunction<Integer> src;

	switch (checkpointMethod) {
		case CHECKPOINTED_FUNCTION:
			src = new PartitionedStateSource(false);
			break;
		case CHECKPOINTED_FUNCTION_BROADCAST:
			src = new PartitionedStateSource(true);
			break;
		case LIST_CHECKPOINTED:
			src = new PartitionedStateSourceListCheckpointed();
			break;
		case NON_PARTITIONED:
			src = new NonPartitionedStateSource();
			break;
		default:
			throw new IllegalArgumentException();
	}

	DataStream<Integer> input = env.addSource(src);

	input.addSink(new DiscardingSink<Integer>());

	return env.getStreamGraph().getJobGraph();
}
 
Example 15
Source File: KeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example 16
Source File: PeriodicStreamingJob.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	int recordsPerSecond = params.getInt("recordsPerSecond", 10);
	int duration = params.getInt("durationInSecond", 60);
	int offset = params.getInt("offsetInSecond", 0);

	StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	sEnv.enableCheckpointing(4000);
	sEnv.getConfig().setAutoWatermarkInterval(1000);

	// execute a simple pass through program.
	PeriodicSourceGenerator generator = new PeriodicSourceGenerator(
		recordsPerSecond, duration, offset);
	DataStream<Tuple> rows = sEnv.addSource(generator);

	DataStream<Tuple> result = rows
		.keyBy(1)
		.timeWindow(Time.seconds(5))
		.sum(0);

	result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	sEnv.execute();
}
 
Example 17
Source File: LegacyStatefulJobSavepointMigrationITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Manually run this to write binary snapshot data.
 */
@Test
@Ignore
public void writeSavepoint() throws Exception {

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (flinkGenerateSavepointBackendType) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(4);
	env.setMaxParallelism(4);

	env
		.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
		.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
		.keyBy(0)
		.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
		.keyBy(0)
		.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
		.keyBy(0)
		.transform(
			"custom_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new TimelyStatefulOperator()).uid("TimelyStatefulOperator")
		.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());

	executeAndSavepoint(
		env,
		"src/test/resources/" + getSavepointPath(flinkGenerateSavepointVersion, flinkGenerateSavepointBackendType),
		new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
 
Example 18
Source File: EventTimeAllWindowCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedTumblingTimeWindow() {
	final int numElementsPerKey = 3000;
	final int windowSize = 100;
	final int numKeys = 1;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();

		env
			.addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey))
				.rebalance()
				.timeWindowAll(Time.of(windowSize, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
			.addSink(new ValidatingSink<>(
				new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey),
				new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize)))
			.setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 19
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having more Flink sources than Kafka partitions, which means
 * that some Flink sources will read no partitions.
 */
public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception {
	final String topic = "manyToOneTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 8;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	// set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();
	env.setBufferTimeout(0);

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
		.addSource(kafkaSource)
		.map(new PartitionValidatingMapper(numPartitions, 1))
		.map(new FailingIdentityMapper<Integer>(failAfterElements))
		.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "multi-source-one-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example 20
Source File: EventTimeAllWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedFoldingTumblingTimeWindow() {
	final int numElementsPerKey = 3000;
	final int windowSize = 100;
	final int numKeys = 1;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();

		env
			.addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey))
				.rebalance()
				.timeWindowAll(Time.of(windowSize, MILLISECONDS))
				.fold(new Tuple4<>(0L, 0L, 0L, new IntType(0)),
						new FoldFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>>() {
							@Override
							public Tuple4<Long, Long, Long, IntType> fold(Tuple4<Long, Long, Long, IntType> accumulator,
									Tuple2<Long, IntType> value) throws Exception {
								accumulator.f0 = value.f0;
								accumulator.f3 = new IntType(accumulator.f3.value + value.f1.value);
								return accumulator;
							}
						},
						new RichAllWindowFunction<Tuple4<Long, Long, Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

							private boolean open = false;

							@Override
							public void open(Configuration parameters) {
								assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
								open = true;
							}

							@Override
							public void apply(
									TimeWindow window,
									Iterable<Tuple4<Long, Long, Long, IntType>> input,
									Collector<Tuple4<Long, Long, Long, IntType>> out) {

								// validate that the function has been opened properly
								assertTrue(open);

								for (Tuple4<Long, Long, Long, IntType> in: input) {
									out.collect(new Tuple4<>(in.f0,
											window.getStart(),
											window.getEnd(),
											in.f3));
								}
							}
						})
			.addSink(new ValidatingSink<>(
				new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey),
				new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize)))
			.setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}