Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setStateBackend()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setStateBackend() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void setupStateBackend(final StreamExecutionEnvironment env, final ParameterTool pt) throws IOException {
	final String stateBackend = pt.get(
		STATE_BACKEND.key(),
		STATE_BACKEND.defaultValue());

	final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key());

	if ("file".equalsIgnoreCase(stateBackend)) {
		boolean asyncCheckpoints = pt.getBoolean(
			STATE_BACKEND_FILE_ASYNC.key(),
			STATE_BACKEND_FILE_ASYNC.defaultValue());

		env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints));
	} else if ("rocks".equalsIgnoreCase(stateBackend)) {
		boolean incrementalCheckpoints = pt.getBoolean(
			STATE_BACKEND_ROCKS_INCREMENTAL.key(),
			STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue());

		env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
	} else {
		throw new IllegalArgumentException("Unknown backend requested: " + stateBackend);
	}
}
 
Example 2
Source File: AbstractOperatorRestoreTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
private JobGraph createJobGraph(ExecutionMode mode) {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStateBackend((StateBackend) new MemoryStateBackend());

	switch (mode) {
		case MIGRATE:
			createMigrationJob(env);
			break;
		case RESTORE:
			createRestoredJob(env);
			break;
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
 
Example 3
Source File: CassandraTupleWriteAheadSinkExample.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.enableCheckpointing(1000);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
    env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));

    CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
            .setQuery("INSERT INTO zhisheng.values (id, counter) values (?, ?);")
            .enableWriteAheadLog()
            .setClusterBuilder(new ClusterBuilder() {

                private static final long serialVersionUID = 2793938419775311824L;

                @Override
                public Cluster buildCluster(Cluster.Builder builder) {
                    return builder.addContactPoint("127.0.0.1").build();
                }
            })
            .build();

    sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");

    env.execute();
}
 
Example 4
Source File: EnableCheckpointMain.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    //创建流运行环境
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args));
    env.setParallelism(1);

    env.addSource(new SourceFunction<Long>() {
        @Override
        public void run(SourceContext<Long> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(null);
            }
        }
        @Override
        public void cancel() {
        }
    })
            .map((MapFunction<Long, Long>) aLong -> aLong / 1)
            .print();

    //开启 checkpoint
    StateBackend stateBackend = new MemoryStateBackend(5 * 1024 * 1024 * 100);
    env.enableCheckpointing(10000);
    env.setStateBackend(stateBackend);

    env.execute("zhisheng default RestartStrategy enable checkpoint example");
}
 
Example 5
Source File: DataStreamStateTTLTestProgram.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Sets the state backend to a new {@link StubStateBackend} which has a {@link MonotonicTTLTimeProvider}.
 *
 * @param env The {@link StreamExecutionEnvironment} of the job.
 */
private static void setBackendWithCustomTTLTimeProvider(StreamExecutionEnvironment env) {
	final MonotonicTTLTimeProvider ttlTimeProvider = new MonotonicTTLTimeProvider();

	final StateBackend configuredBackend = env.getStateBackend();
	if (configuredBackend instanceof RocksDBStateBackend) {
		((RocksDBStateBackend) configuredBackend).enableTtlCompactionFilter();
	}
	final StateBackend stubBackend = new StubStateBackend(configuredBackend, ttlTimeProvider);
	env.setStateBackend(stubBackend);
}
 
Example 6
Source File: KeyedStateCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void testProgramWithBackend(AbstractStateBackend stateBackend) throws Exception {
	assertEquals("Broken test setup", 0, (NUM_STRINGS / 2) % NUM_KEYS);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.enableCheckpointing(500);
			env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L));

	env.setStateBackend(stateBackend);

	// compute when (randomly) the failure should happen
	final int failurePosMin = (int) (0.6 * NUM_STRINGS / PARALLELISM);
	final int failurePosMax = (int) (0.8 * NUM_STRINGS / PARALLELISM);
	final int failurePos = (new Random().nextInt(failurePosMax - failurePosMin) + failurePosMin);

	final DataStream<Integer> stream1 = env.addSource(
			new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4));

	final DataStream<Integer> stream2 = env.addSource(
			new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4));

	stream1.union(stream2)
			.keyBy(new IdentityKeySelector<Integer>())
			.map(new OnceFailingPartitionedSum(failurePos))
			.keyBy(0)
			.addSink(new CounterSink());

	env.execute();

	// verify that we counted exactly right
	assertEquals(NUM_KEYS, CounterSink.ALL_COUNTS.size());
	assertEquals(NUM_KEYS, OnceFailingPartitionedSum.ALL_SUMS.size());

	for (Entry<Integer, Long> sum : OnceFailingPartitionedSum.ALL_SUMS.entrySet()) {
		assertEquals((long) sum.getKey() * NUM_STRINGS / NUM_KEYS, sum.getValue().longValue());
	}
	for (long count : CounterSink.ALL_COUNTS.values()) {
		assertEquals(NUM_STRINGS / NUM_KEYS, count);
	}
}
 
Example 7
Source File: TuningKeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example 8
Source File: AbstractQueryableStateTestBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests simple value state queryable state instance. Each source emits
 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
 * queried. The tests succeeds after each subtask index is queried with
 * value numElements (the latest element updated the state).
 */
@Test
public void testValueState() throws Exception {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	// Value state
	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());

	source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
		private static final long serialVersionUID = 7662520075515707428L;

		@Override
		public Integer getKey(Tuple2<Integer, Long> value) {
			return value.f0;
		}
	}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		clusterClient.setDetached(true);
		clusterClient.submitJob(jobGraph, AbstractQueryableStateTestBase.class.getClassLoader());

		executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements);
	}
}
 
Example 9
Source File: AbstractQueryableStateTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests simple value state queryable state instance. Each source emits
 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
 * queried. The tests succeeds after each subtask index is queried with
 * value numElements (the latest element updated the state).
 */
@Test
public void testValueState() throws Exception {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	// Value state
	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());

	source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
		private static final long serialVersionUID = 7662520075515707428L;

		@Override
		public Integer getKey(Tuple2<Integer, Long> value) {
			return value.f0;
		}
	}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		clusterClient.setDetached(true);
		clusterClient.submitJob(jobGraph, AbstractQueryableStateTestBase.class.getClassLoader());

		executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements);
	}
}
 
Example 10
Source File: EventTimeWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
					env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								// validate that the function has been opened properly
								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
				.addSink(new ValidatingSink<>(
					new SinkValidatorUpdateFun(numElementsPerKey),
					new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 11
Source File: AbstractQueryableStateTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests simple value state queryable state instance with a default value
 * set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
 * tuples, the key is mapped to 1 but key 0 is queried which should throw
 * a {@link UnknownKeyOrNamespaceException} exception.
 *
 * @throws UnknownKeyOrNamespaceException thrown due querying a non-existent key
 */
@Test(expected = UnknownKeyOrNamespaceException.class)
public void testValueStateDefault() throws Throwable {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>(
			"any", source.getType(), 	Tuple2.of(0, 1337L));

	// only expose key "1"
	QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(
			new KeySelector<Tuple2<Integer, Long>, Integer>() {
				private static final long serialVersionUID = 4509274556892655887L;

				@Override
				public Integer getKey(Tuple2<Integer, Long> value) {
					return 1;
				}
			}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		clusterClient.setDetached(true);
		clusterClient.submitJob(jobGraph, AbstractQueryableStateTestBase.class.getClassLoader());

		// Now query
		int key = 0;
		CompletableFuture<ValueState<Tuple2<Integer, Long>>> future = getKvState(
				deadline,
				client,
				jobId,
				queryableState.getQueryableStateName(),
				key,
				BasicTypeInfo.INT_TYPE_INFO,
				valueState,
				true,
				executor);

		try {
			future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		} catch (ExecutionException | CompletionException e) {
			// get() on a completedExceptionally future wraps the
			// exception in an ExecutionException.
			throw e.getCause();
		}
	}
}
 
Example 12
Source File: EventTimeWindowCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setMaxParallelism(2 * PARALLELISM);
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						int sum = 0;
						long key = -1;

						for (Tuple2<Long, IntType> value : values) {
							sum += value.f1.value;
							key = value.f0;
						}
						final Tuple4<Long, Long, Long, IntType> output =
							new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum));
						out.collect(output);
					}
				})
			.addSink(new ValidatingSink<>(
				new SinkValidatorUpdateFun(numElementsPerKey),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 13
Source File: TaskManagerProcessFailureStreamingRecoveryITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void testTaskManagerFailure(Configuration configuration, final File coordinateDir) throws Exception {

	final File tempCheckpointDir = tempFolder.newFolder();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment(
		"localhost",
		1337, // not needed since we use ZooKeeper
		configuration);
	env.setParallelism(PARALLELISM);
	env.getConfig().disableSysoutLogging();
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
	env.enableCheckpointing(200);

	env.setStateBackend(new FsStateBackend(tempCheckpointDir.getAbsoluteFile().toURI()));

	DataStream<Long> result = env.addSource(new SleepyDurableGenerateSequence(coordinateDir, DATA_COUNT))
			// add a non-chained no-op map to test the chain state restore logic
			.map(new MapFunction<Long, Long>() {
				@Override
				public Long map(Long value) throws Exception {
					return value;
				}
			}).startNewChain()
					// populate the coordinate directory so we can proceed to TaskManager failure
			.map(new Mapper(coordinateDir));

	//write result to temporary file
	result.addSink(new CheckpointedSink(DATA_COUNT));

	try {
		// blocking call until execution is done
		env.execute();

		// TODO: Figure out why this fails when ran with other tests
		// Check whether checkpoints have been cleaned up properly
		// assertDirectoryEmpty(tempCheckpointDir);
	}
	finally {
		// clean up
		if (tempCheckpointDir.exists()) {
			FileUtils.deleteDirectory(tempCheckpointDir);
		}
	}
}
 
Example 14
Source File: StateMain.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(10000);
    env.setStateBackend(new MemoryStateBackend());

    env.addSource(new RichParallelSourceFunction<Tuple2<String, Long>>() {
        @Override
        public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(new Tuple2<>(String.valueOf(System.currentTimeMillis()), System.currentTimeMillis()));
                Thread.sleep(10);
            }
        }

        @Override
        public void cancel() {

        }
    }).keyBy(0)
            .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {

                private ValueState<Long> state;

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    state = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {
                                    })));
                }

                @Override
                public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                    state.update(tuple2.f1);
                    return tuple2;
                }
            }).print();

    env.execute();
}
 
Example 15
Source File: TypeSerializerSnapshotMigrationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepoint() throws Exception {
	final int parallelism = 1;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setMaxParallelism(parallelism);

	SourceFunction<Tuple2<Long, Long>> nonParallelSource =
		new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);

	env.addSource(nonParallelSource)
		.keyBy(0)
		.map(new TestMapFunction())
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		executeAndSavepoint(
			env,
			"src/test/resources/" + getSavepointPath(testMigrateVersion, testStateBackend),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
	} else {
		restoreAndExecute(
			env,
			getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
	}
}
 
Example 16
Source File: LegacyStatefulJobSavepointMigrationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepointRestore() throws Exception {

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(4);
	env.setMaxParallelism(4);

	env
		.addSource(new CheckingRestoringSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
		.flatMap(new CheckingRestoringFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
		.keyBy(0)
		.flatMap(new CheckingRestoringFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
		.keyBy(0)
		.flatMap(new CheckingKeyedStateFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
		.keyBy(0)
		.transform(
			"custom_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckingRestoringUdfOperator(new CheckingRestoringFlatMapWithKeyedStateInOperator())).uid("LegacyCheckpointedOperator")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckingTimelyStatefulOperator()).uid("TimelyStatefulOperator")
		.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());

	restoreAndExecute(
		env,
		getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
		new Tuple2<>(CheckingRestoringSource.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, 1),
		new Tuple2<>(CheckingRestoringFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingRestoringFlatMapWithKeyedState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingKeyedStateFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingRestoringUdfOperator.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingRestoringFlatMapWithKeyedStateInOperator.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESS_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_EVENT_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESSING_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
 
Example 17
Source File: StickyAllocationAndLocalRecoveryTestJob.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool pt = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env.setParallelism(pt.getInt("parallelism", 1));
		env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
		env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
		if (pt.getBoolean("externalizedCheckpoints", false)) {
			env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		}

		String stateBackend = pt.get("stateBackend", "file");
		String checkpointDir = pt.getRequired("checkpointDir");

		boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);

		if ("file".equals(stateBackend)) {
			boolean asyncCheckpoints = pt.getBoolean("asyncCheckpoints", true);
			env.setStateBackend(new FsStateBackend(checkpointDir, asyncCheckpoints));
		} else if ("rocks".equals(stateBackend)) {
			boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
			env.setStateBackend(new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
		} else {
			throw new IllegalArgumentException("Unknown backend: " + stateBackend);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(pt);

		// delay to throttle down the production of the source
		long delay = pt.getLong("delay", 0L);

		// the maximum number of attempts, before the job finishes with success
		int maxAttempts = pt.getInt("maxAttempts", 3);

		// size of one artificial value
		int valueSize = pt.getInt("valueSize", 10);

		env.addSource(new RandomLongSource(maxAttempts, delay))
			.keyBy((KeySelector<Long, Long>) aLong -> aLong)
			.flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail))
			.addSink(new PrintSinkFunction<>());

		env.execute("Sticky Allocation And Local Recovery Test");
	}
 
Example 18
Source File: StickyAllocationAndLocalRecoveryTestJob.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool pt = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env.setParallelism(pt.getInt("parallelism", 1));
		env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
		env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
		if (pt.getBoolean("externalizedCheckpoints", false)) {
			env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		}

		String stateBackend = pt.get("stateBackend", "file");
		String checkpointDir = pt.getRequired("checkpointDir");

		boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);

		if ("file".equals(stateBackend)) {
			boolean asyncCheckpoints = pt.getBoolean("asyncCheckpoints", true);
			env.setStateBackend(new FsStateBackend(checkpointDir, asyncCheckpoints));
		} else if ("rocks".equals(stateBackend)) {
			boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
			env.setStateBackend(new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
		} else {
			throw new IllegalArgumentException("Unknown backend: " + stateBackend);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(pt);

		// delay to throttle down the production of the source
		long delay = pt.getLong("delay", 0L);

		// the maximum number of attempts, before the job finishes with success
		int maxAttempts = pt.getInt("maxAttempts", 3);

		// size of one artificial value
		int valueSize = pt.getInt("valueSize", 10);

		env.addSource(new RandomLongSource(maxAttempts, delay))
			.keyBy((KeySelector<Long, Long>) aLong -> aLong)
			.flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail))
			.addSink(new PrintSinkFunction<>());

		env.execute("Sticky Allocation And Local Recovery Test");
	}
 
Example 19
Source File: NonKeyedJob.java    From flink with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	String savepointsPath = pt.getRequired("savepoint-path");

	Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointsPath);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());

	env.setStateBackend(new MemoryStateBackend());

	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.GENERATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.GENERATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.GENERATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.GENERATE, stateless);

	env.execute("job");
}
 
Example 20
Source File: NonKeyedJob.java    From flink with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	String savepointsPath = pt.getRequired("savepoint-path");

	Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointsPath);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());

	env.setStateBackend(new MemoryStateBackend());

	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.GENERATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.GENERATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.GENERATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.GENERATE, stateless);

	env.execute("job");
}