Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setRestartStrategy()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setRestartStrategy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CassandraTupleWriteAheadSinkExample.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.enableCheckpointing(1000);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
    env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));

    CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
            .setQuery("INSERT INTO zhisheng.values (id, counter) values (?, ?);")
            .enableWriteAheadLog()
            .setClusterBuilder(new ClusterBuilder() {

                private static final long serialVersionUID = 2793938419775311824L;

                @Override
                public Cluster buildCluster(Cluster.Builder builder) {
                    return builder.addContactPoint("127.0.0.1").build();
                }
            })
            .build();

    sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");

    env.execute();
}
 
Example 2
Source File: FixedDelayRestartStrategyMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    //创建流运行环境
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args));
    //每隔 5s 重启一次,尝试三次如果 Job 还没有起来则停止
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000));

    env.addSource(new SourceFunction<Long>() {
        @Override
        public void run(SourceContext<Long> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(null);
            }
        }
        @Override
        public void cancel() {
        }
    })
            .map((MapFunction<Long, Long>) aLong -> aLong / 1)
            .print();

    env.execute("zhisheng fixedDelay Restart Strategy example");
}
 
Example 3
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void setupRestartStrategy(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String restartStrategyConfig = pt.get(ENVIRONMENT_RESTART_STRATEGY.key());
	if (restartStrategyConfig != null) {
		RestartStrategies.RestartStrategyConfiguration restartStrategy;
		switch (restartStrategyConfig) {
			case "fixed_delay":
				restartStrategy = RestartStrategies.fixedDelayRestart(
					pt.getInt(
						ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.key(),
						ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.defaultValue()),
					pt.getLong(
						ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.key(),
						ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.defaultValue()));
				break;
			case "no_restart":
				restartStrategy = RestartStrategies.noRestart();
				break;
			default:
				throw new IllegalArgumentException("Unknown restart strategy: " + restartStrategyConfig);
		}
		env.setRestartStrategy(restartStrategy);
	}
}
 
Example 4
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test that ensures the KafkaConsumer is properly failing if the topic doesnt exist
 * and a wrong broker was specified.
 *
 * @throws Exception
 */
public void runFailOnNoBrokerTest() throws Exception {
	try {
		Properties properties = new Properties();

		StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
		see.getConfig().disableSysoutLogging();
		see.setRestartStrategy(RestartStrategies.noRestart());
		see.setParallelism(1);

		// use wrong ports for the consumers
		properties.setProperty("bootstrap.servers", "localhost:80");
		properties.setProperty("group.id", "test");
		properties.setProperty("request.timeout.ms", "3000"); // let the test fail fast
		properties.setProperty("socket.timeout.ms", "3000");
		properties.setProperty("session.timeout.ms", "2000");
		properties.setProperty("fetch.max.wait.ms", "2000");
		properties.setProperty("heartbeat.interval.ms", "1000");
		properties.putAll(secureProps);
		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer("doesntexist", new SimpleStringSchema(), properties);
		DataStream<String> stream = see.addSource(source);
		stream.print();
		see.execute("No broker test");
	} catch (JobExecutionException jee) {
		if (kafkaServer.getVersion().equals("0.9") ||
			kafkaServer.getVersion().equals("0.10") ||
			kafkaServer.getVersion().equals("0.11") ||
			kafkaServer.getVersion().equals("2.0")) {
			final Optional<TimeoutException> optionalTimeoutException = ExceptionUtils.findThrowable(jee, TimeoutException.class);
			assertTrue(optionalTimeoutException.isPresent());

			final TimeoutException timeoutException = optionalTimeoutException.get();
			assertEquals("Timeout expired while fetching topic metadata", timeoutException.getMessage());
		} else {
			final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(jee, "Unable to retrieve any partitions");
			assertTrue(optionalThrowable.isPresent());
			assertTrue(optionalThrowable.get() instanceof RuntimeException);
		}
	}
}
 
Example 5
Source File: AbstractQueryableStateTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests simple value state queryable state instance. Each source emits
 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
 * queried. The tests succeeds after each subtask index is queried with
 * value numElements (the latest element updated the state).
 */
@Test
public void testValueState() throws Exception {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	// Value state
	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());

	source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
		private static final long serialVersionUID = 7662520075515707428L;

		@Override
		public Integer getKey(Tuple2<Integer, Long> value) {
			return value.f0;
		}
	}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		clusterClient.setDetached(true);
		clusterClient.submitJob(jobGraph, AbstractQueryableStateTestBase.class.getClassLoader());

		executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements);
	}
}
 
Example 6
Source File: KeyedStateCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void testProgramWithBackend(AbstractStateBackend stateBackend) throws Exception {
	assertEquals("Broken test setup", 0, (NUM_STRINGS / 2) % NUM_KEYS);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.enableCheckpointing(500);
			env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L));

	env.setStateBackend(stateBackend);

	// compute when (randomly) the failure should happen
	final int failurePosMin = (int) (0.6 * NUM_STRINGS / PARALLELISM);
	final int failurePosMax = (int) (0.8 * NUM_STRINGS / PARALLELISM);
	final int failurePos = (new Random().nextInt(failurePosMax - failurePosMin) + failurePosMin);

	final DataStream<Integer> stream1 = env.addSource(
			new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4));

	final DataStream<Integer> stream2 = env.addSource(
			new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4));

	stream1.union(stream2)
			.keyBy(new IdentityKeySelector<Integer>())
			.map(new OnceFailingPartitionedSum(failurePos))
			.keyBy(0)
			.addSink(new CounterSink());

	env.execute();

	// verify that we counted exactly right
	assertEquals(NUM_KEYS, CounterSink.ALL_COUNTS.size());
	assertEquals(NUM_KEYS, OnceFailingPartitionedSum.ALL_SUMS.size());

	for (Entry<Integer, Long> sum : OnceFailingPartitionedSum.ALL_SUMS.entrySet()) {
		assertEquals((long) sum.getKey() * NUM_STRINGS / NUM_KEYS, sum.getValue().longValue());
	}
	for (long count : CounterSink.ALL_COUNTS.values()) {
		assertEquals(NUM_STRINGS / NUM_KEYS, count);
	}
}
 
Example 7
Source File: FlinkPulsarITest.java    From pulsar-flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testOne2OneExactlyOnce() throws Exception {
    String topic = newTopic();
    int parallelism = 5;
    int numElementsPerPartition = 1000;
    int totalElements = parallelism * numElementsPerPartition;
    int failAfterElements = numElementsPerPartition / 3;

    List<String> allTopicNames = new ArrayList<>();
    for (int i = 0; i < parallelism; i++) {
        allTopicNames.add(topic + "-partition-" + i);
    }

    createTopic(topic, parallelism, adminUrl);

    generateRandomizedIntegerSequence(
            StreamExecutionEnvironment.getExecutionEnvironment(), topic, parallelism, numElementsPerPartition, true);

    // run the topology that fails and recovers

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(500);
    env.setParallelism(parallelism);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
    env.getConfig().disableSysoutLogging();

    Properties sourceProps = sourceProperties();
    sourceProps.setProperty(TOPIC_MULTI_OPTION_KEY, StringUtils.join(allTopicNames, ','));

    env.addSource(new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromEarliest())
            .map(new PartitionValidationMapper(parallelism, 1))
            .map(new FailingIdentityMapper<Row>(failAfterElements))
            .addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

    FailingIdentityMapper.failedBefore = false;
    TestUtils.tryExecute(env, "one to one exactly once test");
}
 
Example 8
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having more Flink sources than Kafka partitions, which means
 * that some Flink sources will read no partitions.
 */
public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception {
	final String topic = "manyToOneTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 8;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	// set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();
	env.setBufferTimeout(0);

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
		.addSource(kafkaSource)
		.map(new PartitionValidatingMapper(numPartitions, 1))
		.map(new FailingIdentityMapper<Integer>(failAfterElements))
		.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "multi-source-one-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example 9
Source File: WindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTumblingProcessingTimeWindow() {
	final int numElements = 3000;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(timeCharacteristic);
		env.getConfig().setAutoWatermarkInterval(10);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();

		SinkValidatorUpdaterAndChecker updaterAndChecker =
			new SinkValidatorUpdaterAndChecker(numElements, 1);

		env
				.addSource(new FailingSource(new Generator(), numElements, timeCharacteristic))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(100, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple2<Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> value : values) {
							assertEquals(value.f0.intValue(), value.f1.value);
							out.collect(new Tuple2<>(value.f0, new IntType(1)));
						}
					}
				})
			.addSink(new ValidatingSink<>(updaterAndChecker, updaterAndChecker, timeCharacteristic))
			.setParallelism(1);

		tryExecute(env, "Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 10
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having more Flink sources than Kafka partitions, which means
 * that some Flink sources will read no partitions.
 */
public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception {
	final String topic = "manyToOneTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 8;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	// set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
			env.setBufferTimeout(0);

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
		.addSource(kafkaSource)
		.map(new PartitionValidatingMapper(numPartitions, 1))
		.map(new FailingIdentityMapper<Integer>(failAfterElements))
		.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "multi-source-one-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example 11
Source File: EventTimeWindowCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								// validate that the function has been opened properly
								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
				.addSink(new ValidatingSink<>(
					new SinkValidatorUpdateFun(numElementsPerKey),
					new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 12
Source File: StreamSQLTestProgram.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");

		StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}
 
Example 13
Source File: DataStreamAllroundTestJobFactory.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void setupEnvironment(StreamExecutionEnvironment env, ParameterTool pt) throws Exception {

		// set checkpointing semantics
		String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
		long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
		CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
			? CheckpointingMode.EXACTLY_ONCE
			: CheckpointingMode.AT_LEAST_ONCE;

		env.enableCheckpointing(checkpointInterval, checkpointingMode);

		// use event time
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// parallelism
		env.setParallelism(pt.getInt(ENVIRONMENT_PARALLELISM.key(), ENVIRONMENT_PARALLELISM.defaultValue()));
		env.setMaxParallelism(pt.getInt(ENVIRONMENT_MAX_PARALLELISM.key(), ENVIRONMENT_MAX_PARALLELISM.defaultValue()));

		// restart strategy
		String restartStrategyConfig = pt.get(ENVIRONMENT_RESTART_STRATEGY.key());
		if (restartStrategyConfig != null) {
			RestartStrategies.RestartStrategyConfiguration restartStrategy;
			switch (restartStrategyConfig) {
				case "fixed_delay":
					restartStrategy = RestartStrategies.fixedDelayRestart(
						pt.getInt(
							ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.key(),
							ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.defaultValue()),
						pt.getLong(
							ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.key(),
							ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.defaultValue()));
					break;
				case "no_restart":
					restartStrategy = RestartStrategies.noRestart();
					break;
				default:
					throw new IllegalArgumentException("Unkown restart strategy: " + restartStrategyConfig);
			}
			env.setRestartStrategy(restartStrategy);
		}

		// state backend
		final String stateBackend = pt.get(
			STATE_BACKEND.key(),
			STATE_BACKEND.defaultValue());

		final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key());

		if ("file".equalsIgnoreCase(stateBackend)) {
			boolean asyncCheckpoints = pt.getBoolean(
				STATE_BACKEND_FILE_ASYNC.key(),
				STATE_BACKEND_FILE_ASYNC.defaultValue());

			env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints));
		} else if ("rocks".equalsIgnoreCase(stateBackend)) {
			boolean incrementalCheckpoints = pt.getBoolean(
				STATE_BACKEND_ROCKS_INCREMENTAL.key(),
				STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue());

			env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
		} else {
			throw new IllegalArgumentException("Unknown backend requested: " + stateBackend);
		}

		boolean enableExternalizedCheckpoints = pt.getBoolean(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

		if (enableExternalizedCheckpoints) {
			String cleanupModeConfig = pt.get(
				ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
				ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

			CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
			switch (cleanupModeConfig) {
				case "retain":
					cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
					break;
				case "delete":
					cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
					break;
				default:
					throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
			}

			env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(pt);
	}
 
Example 14
Source File: KafkaShortRetentionTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}
 
Example 15
Source File: EventTimeWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void doTestTumblingTimeWindowWithKVState(int maxParallelism) {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setMaxParallelism(maxParallelism);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
					env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					private ValueState<Integer> count;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
						count = getRuntimeContext().getState(
								new ValueStateDescriptor<>("count", Integer.class, 0));
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) throws Exception {

						// the window count state starts with the key, so that we get
						// different count results for each key
						if (count.value() == 0) {
							count.update(tuple.<Long>getField(0).intValue());
						}

						// validate that the function has been opened properly
						assertTrue(open);

						count.update(count.value() + 1);
						out.collect(new Tuple4<>(tuple.<Long>getField(0), window.getStart(), window.getEnd(), new IntType(count.value())));
					}
				})
			.addSink(new ValidatingSink<>(
				new CountingSinkValidatorUpdateFun(),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 16
Source File: KafkaProducerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This test sets KafkaProducer so that it will  automatically flush the data and
 * and fails the broker to check whether flushed records since last checkpoint were not duplicated.
 */
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
	final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
	final int partition = 0;
	final int numElements = 1000;
	final int failAfterElements = 333;

	for (int i = 0; i < sinksCount; i++) {
		createTestTopic(topic + i, 1, 1);
	}

	TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
	KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties properties = new Properties();
	properties.putAll(standardProps);
	properties.putAll(secureProps);

	// process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application
	List<Integer> expectedElements = getIntegersSequence(numElements);

	DataStream<Integer> inputStream = env
		.addSource(new IntegerSource(numElements))
		.map(new FailingIdentityMapper<Integer>(failAfterElements));

	for (int i = 0; i < sinksCount; i++) {
		FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
			@Override
			public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
				return partition;
			}
		};

		if (regularSink) {
			StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner);
			inputStream.addSink(kafkaSink.getUserFunction());
		} else {
			kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner);
		}
	}

	FailingIdentityMapper.failedBefore = false;
	TestUtils.tryExecute(env, "Exactly once test");

	for (int i = 0; i < sinksCount; i++) {
		// assert that before failure we successfully snapshot/flushed all expected elements
		assertExactlyOnceForTopic(
			properties,
			topic + i,
			partition,
			expectedElements,
			KAFKA_READ_TIMEOUT);
		deleteTestTopic(topic + i);
	}
}
 
Example 17
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so
 * one Flink source will read multiple Kafka partitions.
 */
public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception {
	final String topic = "oneToManyTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 2;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(numPartitions, 3))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-source-multi-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example 18
Source File: StatefulJobSavepointMigrationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepoint() throws Exception {

	final int parallelism = 4;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setMaxParallelism(parallelism);

	SourceFunction<Tuple2<Long, Long>> nonParallelSource;
	SourceFunction<Tuple2<Long, Long>> parallelSource;
	RichFlatMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>> flatMap;
	OneInputStreamOperator<Tuple2<Long, Long>, Tuple2<Long, Long>> timelyOperator;

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
		parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
		flatMap = new CheckpointingKeyedStateFlatMap();
		timelyOperator = new CheckpointingTimelyStatefulOperator();
	} else if (executionMode == ExecutionMode.VERIFY_SAVEPOINT) {
		nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
		parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
		flatMap = new CheckingKeyedStateFlatMap();
		timelyOperator = new CheckingTimelyStatefulOperator();
	} else {
		throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
	}

	env
		.addSource(nonParallelSource).uid("CheckpointingSource1")
		.keyBy(0)
		.flatMap(flatMap).startNewChain().uid("CheckpointingKeyedStateFlatMap1")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			timelyOperator).uid("CheckpointingTimelyStatefulOperator1")
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	env
		.addSource(parallelSource).uid("CheckpointingSource2")
		.keyBy(0)
		.flatMap(flatMap).startNewChain().uid("CheckpointingKeyedStateFlatMap2")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			timelyOperator).uid("CheckpointingTimelyStatefulOperator2")
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		executeAndSavepoint(
			env,
			"src/test/resources/" + getSavepointPath(testMigrateVersion, testStateBackend),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
	} else {
		restoreAndExecute(
			env,
			getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
			new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, 1),
			new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, parallelism),
			new Tuple2<>(CheckingKeyedStateFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESS_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_EVENT_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESSING_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
	}
}
 
Example 19
Source File: EventTimeAllWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedTumblingTimeWindow() {
	final int numElementsPerKey = 3000;
	final int windowSize = 100;
	final int numKeys = 1;

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));

		env
			.addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey))
				.rebalance()
				.timeWindowAll(Time.of(windowSize, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
			.addSink(new ValidatingSink<>(
				new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey),
				new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize)))
			.setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 20
Source File: EventTimeWindowCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setMaxParallelism(2 * PARALLELISM);
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						int sum = 0;
						long key = -1;

						for (Tuple2<Long, IntType> value : values) {
							sum += value.f1.value;
							key = value.f0;
						}
						final Tuple4<Long, Long, Long, IntType> output =
							new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum));
						out.collect(output);
					}
				})
			.addSink(new ValidatingSink<>(
				new SinkValidatorUpdateFun(numElementsPerKey),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}