org.apache.flink.contrib.streaming.state.RocksDBStateBackend Java Examples

The following examples show how to use org.apache.flink.contrib.streaming.state.RocksDBStateBackend. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CEPRescalingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> getTestHarness(
		int maxParallelism,
		int taskParallelism,
		int subtaskIdx) throws Exception {

	KeySelector<Event, Integer> keySelector = new TestKeySelector();
	KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> harness =
			new KeyedOneInputStreamOperatorTestHarness<>(
					getKeyedCepOpearator(
							false,
							new NFAFactory()),
					keySelector,
					BasicTypeInfo.INT_TYPE_INFO,
					maxParallelism,
					taskParallelism,
					subtaskIdx);
	harness.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
	return harness;
}
 
Example #2
Source File: EventTimeWindowCheckpointingITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
private void setupRocksDB(Configuration config, int fileSizeThreshold, boolean incrementalCheckpoints) throws IOException {
	// Configure the managed memory size as 64MB per slot for rocksDB state backend.
	config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.ofMebiBytes(PARALLELISM / NUM_OF_TASK_MANAGERS * 64));

	String rocksDb = tempFolder.newFolder().getAbsolutePath();
	String backups = tempFolder.newFolder().getAbsolutePath();
	// we use the fs backend with small threshold here to test the behaviour with file
	// references, not self contained byte handles
	RocksDBStateBackend rdb =
		new RocksDBStateBackend(
			new FsStateBackend(
				new Path("file://" + backups).toUri(), fileSizeThreshold),
			incrementalCheckpoints);
	rdb.setDbStoragePath(rocksDb);
	this.stateBackend = rdb;
}
 
Example #3
Source File: CEPRescalingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> getTestHarness(
		int maxParallelism,
		int taskParallelism,
		int subtaskIdx) throws Exception {

	KeySelector<Event, Integer> keySelector = new TestKeySelector();
	KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> harness =
			new KeyedOneInputStreamOperatorTestHarness<>(
					getKeyedCepOpearator(
							false,
							new NFAFactory()),
					keySelector,
					BasicTypeInfo.INT_TYPE_INFO,
					maxParallelism,
					taskParallelism,
					subtaskIdx);
	harness.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
	return harness;
}
 
Example #4
Source File: RocksDBTtlStateTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
StateBackend createStateBackend(TernaryBoolean enableIncrementalCheckpointing) {
	String dbPath;
	String checkpointPath;
	try {
		dbPath = tempFolder.newFolder().getAbsolutePath();
		checkpointPath = tempFolder.newFolder().toURI().toString();
	} catch (IOException e) {
		throw new FlinkRuntimeException("Failed to init rocksdb test state backend");
	}
	RocksDBStateBackend backend = new RocksDBStateBackend(new FsStateBackend(checkpointPath), enableIncrementalCheckpointing);
	Configuration config = new Configuration();
	config.setBoolean(TTL_COMPACT_FILTER_ENABLED, true);
	backend = backend.configure(config, Thread.currentThread().getContextClassLoader());
	backend.setDbStoragePath(dbPath);
	return backend;
}
 
Example #5
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void setupStateBackend(final StreamExecutionEnvironment env, final ParameterTool pt) throws IOException {
	final String stateBackend = pt.get(
		STATE_BACKEND.key(),
		STATE_BACKEND.defaultValue());

	final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key());

	if ("file".equalsIgnoreCase(stateBackend)) {
		boolean asyncCheckpoints = pt.getBoolean(
			STATE_BACKEND_FILE_ASYNC.key(),
			STATE_BACKEND_FILE_ASYNC.defaultValue());

		env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints));
	} else if ("rocks".equalsIgnoreCase(stateBackend)) {
		boolean incrementalCheckpoints = pt.getBoolean(
			STATE_BACKEND_ROCKS_INCREMENTAL.key(),
			STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue());

		env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
	} else {
		throw new IllegalArgumentException("Unknown backend requested: " + stateBackend);
	}
}
 
Example #6
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void setupStateBackend(final StreamExecutionEnvironment env, final ParameterTool pt) throws IOException {
	final String stateBackend = pt.get(
		STATE_BACKEND.key(),
		STATE_BACKEND.defaultValue());

	final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key());

	if ("file".equalsIgnoreCase(stateBackend)) {
		boolean asyncCheckpoints = pt.getBoolean(
			STATE_BACKEND_FILE_ASYNC.key(),
			STATE_BACKEND_FILE_ASYNC.defaultValue());

		env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints));
	} else if ("rocks".equalsIgnoreCase(stateBackend)) {
		boolean incrementalCheckpoints = pt.getBoolean(
			STATE_BACKEND_ROCKS_INCREMENTAL.key(),
			STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue());

		env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
	} else {
		throw new IllegalArgumentException("Unknown backend requested: " + stateBackend);
	}
}
 
Example #7
Source File: CEPRescalingTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> getTestHarness(
		int maxParallelism,
		int taskParallelism,
		int subtaskIdx) throws Exception {

	KeySelector<Event, Integer> keySelector = new TestKeySelector();
	KeyedOneInputStreamOperatorTestHarness<Integer, Event, Map<String, List<Event>>> harness =
			new KeyedOneInputStreamOperatorTestHarness<>(
					getKeyedCepOpearator(
							false,
							new NFAFactory()),
					keySelector,
					BasicTypeInfo.INT_TYPE_INFO,
					maxParallelism,
					taskParallelism,
					subtaskIdx);
	harness.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
	return harness;
}
 
Example #8
Source File: RocksDBTtlStateTestBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
StateBackend createStateBackend(TernaryBoolean enableIncrementalCheckpointing) {
	String dbPath;
	String checkpointPath;
	try {
		dbPath = tempFolder.newFolder().getAbsolutePath();
		checkpointPath = tempFolder.newFolder().toURI().toString();
	} catch (IOException e) {
		throw new FlinkRuntimeException("Failed to init rocksdb test state backend");
	}
	RocksDBStateBackend backend = new RocksDBStateBackend(new FsStateBackend(checkpointPath), enableIncrementalCheckpointing);
	Configuration config = new Configuration();
	config.setBoolean(TTL_COMPACT_FILTER_ENABLED, true);
	backend = backend.configure(config, Thread.currentThread().getContextClassLoader());
	backend.setDbStoragePath(dbPath);
	return backend;
}
 
Example #9
Source File: KVStateRequestSerializerRocksDBTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests list serialization and deserialization match.
 *
 * @see KvStateRequestSerializerTest#testListSerialization()
 * KvStateRequestSerializerTest#testListSerialization() using the heap state back-end
 * test
 */
@Test
public void testListSerialization() throws Exception {
	final long key = 0L;

	// objects for RocksDB state list serialisation
	DBOptions dbOptions = PredefinedOptions.DEFAULT.createDBOptions();
	dbOptions.setCreateIfMissing(true);
	ExecutionConfig executionConfig = new ExecutionConfig();
	final RocksDBKeyedStateBackend<Long> longHeapKeyedStateBackend =
		new RocksDBKeyedStateBackendBuilder<>(
			"no-op",
			ClassLoader.getSystemClassLoader(),
			temporaryFolder.getRoot(),
			dbOptions,
			stateName -> PredefinedOptions.DEFAULT.createColumnOptions(),
			mock(TaskKvStateRegistry.class),
			LongSerializer.INSTANCE,
			1,
			new KeyGroupRange(0, 0),
			executionConfig,
			TestLocalRecoveryConfig.disabled(),
			RocksDBStateBackend.PriorityQueueStateType.HEAP,
			TtlTimeProvider.DEFAULT,
			new UnregisteredMetricsGroup(),
			Collections.emptyList(),
			AbstractStateBackend.getCompressionDecorator(executionConfig),
			new CloseableRegistry()
		).build();
	longHeapKeyedStateBackend.setCurrentKey(key);

	final InternalListState<Long, VoidNamespace, Long> listState = longHeapKeyedStateBackend.createInternalState(VoidNamespaceSerializer.INSTANCE,
			new ListStateDescriptor<>("test", LongSerializer.INSTANCE));

	KvStateRequestSerializerTest.testListSerialization(key, listState);
	longHeapKeyedStateBackend.dispose();
}
 
Example #10
Source File: KeyedStateCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithRocksDbBackendIncremental() throws Exception {
	RocksDBStateBackend incRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), true);
	incRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());

	testProgramWithBackend(incRocksDbBackend);
}
 
Example #11
Source File: RocksDBTtlStateTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
StateBackend createStateBackend(TernaryBoolean enableIncrementalCheckpointing) {
	String dbPath;
	String checkpointPath;
	try {
		dbPath = tempFolder.newFolder().getAbsolutePath();
		checkpointPath = tempFolder.newFolder().toURI().toString();
	} catch (IOException e) {
		throw new FlinkRuntimeException("Failed to init rocksdb test state backend");
	}
	RocksDBStateBackend backend = new RocksDBStateBackend(new FsStateBackend(checkpointPath), enableIncrementalCheckpointing);
	Configuration config = new Configuration();
	backend = backend.configure(config, Thread.currentThread().getContextClassLoader());
	backend.setDbStoragePath(dbPath);
	return backend;
}
 
Example #12
Source File: SavepointReaderKeyedStateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testKeyedInputFormat() throws Exception {
	runKeyedState(new MemoryStateBackend());
	// Reset the cluster so we can change the
	// state backend in the StreamEnvironment.
	// If we don't do this the tests will fail.
	miniClusterResource.after();
	miniClusterResource.before();
	runKeyedState(new RocksDBStateBackend((StateBackend) new MemoryStateBackend()));
}
 
Example #13
Source File: KeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #14
Source File: KeyedStateBootstrapOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private <T> KeyedOneInputStreamOperatorTestHarness<Long, Long, T> getHarness(OneInputStreamOperator<Long, T> bootstrapOperator, OperatorSubtaskState state) throws Exception {
	KeyedOneInputStreamOperatorTestHarness<Long, Long, T> harness = new KeyedOneInputStreamOperatorTestHarness<>(
		bootstrapOperator, id -> id, Types.LONG, 128, 1, 0);

	harness.setStateBackend(new RocksDBStateBackend(folder.newFolder().toURI()));
	if (state != null) {
		harness.initializeState(state);
	}
	harness.open();
	return harness;
}
 
Example #15
Source File: StateBackendBenchmarkUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
private static RocksDBKeyedStateBackend<Long> createRocksDBKeyedStateBackend(File rootDir) throws IOException {
	File recoveryBaseDir = prepareDirectory(recoveryDirName, rootDir);
	File dbPathFile = prepareDirectory(dbDirName, rootDir);
	ExecutionConfig executionConfig = new ExecutionConfig();
	RocksDBResourceContainer resourceContainer = new RocksDBResourceContainer();
	RocksDBKeyedStateBackendBuilder<Long> builder = new RocksDBKeyedStateBackendBuilder<>(
		"Test",
		Thread.currentThread().getContextClassLoader(),
		dbPathFile,
		resourceContainer,
		stateName -> resourceContainer.getColumnOptions(),
		null,
		LongSerializer.INSTANCE,
		2,
		new KeyGroupRange(0, 1),
		executionConfig,
		new LocalRecoveryConfig(false, new LocalRecoveryDirectoryProviderImpl(recoveryBaseDir, new JobID(), new JobVertexID(), 0)),
		RocksDBStateBackend.PriorityQueueStateType.ROCKSDB,
		TtlTimeProvider.DEFAULT,
		new UnregisteredMetricsGroup(),
		Collections.emptyList(),
		AbstractStateBackend.getCompressionDecorator(executionConfig),
		new CloseableRegistry());
	try {
		return builder.build();
	} catch (Exception e) {
		IOUtils.closeQuietly(resourceContainer);
		throw e;
	}
}
 
Example #16
Source File: KeyedStateCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithRocksDbBackendIncremental() throws Exception {
	RocksDBStateBackend incRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), true);
	incRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());

	testProgramWithBackend(incRocksDbBackend);
}
 
Example #17
Source File: KeyedStateCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithRocksDbBackendFull() throws Exception {
	RocksDBStateBackend fullRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), false);
	fullRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());

	testProgramWithBackend(fullRocksDbBackend);
}
 
Example #18
Source File: DataStreamStateTTLTestProgram.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Sets the state backend to a new {@link StubStateBackend} which has a {@link MonotonicTTLTimeProvider}.
 *
 * @param env The {@link StreamExecutionEnvironment} of the job.
 */
private static void setBackendWithCustomTTLTimeProvider(StreamExecutionEnvironment env) {
	final MonotonicTTLTimeProvider ttlTimeProvider = new MonotonicTTLTimeProvider();

	final StateBackend configuredBackend = env.getStateBackend();
	if (configuredBackend instanceof RocksDBStateBackend) {
		((RocksDBStateBackend) configuredBackend).enableTtlCompactionFilter();
	}
	final StateBackend stubBackend = new StubStateBackend(configuredBackend, ttlTimeProvider);
	env.setStateBackend(stubBackend);
}
 
Example #19
Source File: EventTimeWindowCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void setupRocksDB(int fileSizeThreshold, boolean incrementalCheckpoints) throws IOException {
	String rocksDb = tempFolder.newFolder().getAbsolutePath();
	String backups = tempFolder.newFolder().getAbsolutePath();
	// we use the fs backend with small threshold here to test the behaviour with file
	// references, not self contained byte handles
	RocksDBStateBackend rdb =
		new RocksDBStateBackend(
			new FsStateBackend(
				new Path("file://" + backups).toUri(), fileSizeThreshold),
			incrementalCheckpoints);
	rdb.setDbStoragePath(rocksDb);
	this.stateBackend = rdb;
}
 
Example #20
Source File: KeyedStateCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithRocksDbBackendIncremental() throws Exception {
	RocksDBStateBackend incRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), true);
	incRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());

	testProgramWithBackend(incRocksDbBackend);
}
 
Example #21
Source File: KeyedStateCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithRocksDbBackendFull() throws Exception {
	RocksDBStateBackend fullRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), false);
	fullRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());

	testProgramWithBackend(fullRocksDbBackend);
}
 
Example #22
Source File: KeyedStateCheckpointingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithRocksDbBackendFull() throws Exception {
	RocksDBStateBackend fullRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), false);
	fullRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());

	testProgramWithBackend(fullRocksDbBackend);
}
 
Example #23
Source File: DataStreamStateTTLTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Sets the state backend to a new {@link StubStateBackend} which has a {@link MonotonicTTLTimeProvider}.
 *
 * @param env The {@link StreamExecutionEnvironment} of the job.
 */
private static void setBackendWithCustomTTLTimeProvider(StreamExecutionEnvironment env) {
	final MonotonicTTLTimeProvider ttlTimeProvider = new MonotonicTTLTimeProvider();

	final StateBackend configuredBackend = env.getStateBackend();
	if (configuredBackend instanceof RocksDBStateBackend) {
		((RocksDBStateBackend) configuredBackend).enableTtlCompactionFilter();
	}
	final StateBackend stubBackend = new StubStateBackend(configuredBackend, ttlTimeProvider);
	env.setStateBackend(stubBackend);
}
 
Example #24
Source File: KVStateRequestSerializerRocksDBTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests list serialization and deserialization match.
 *
 * @see KvStateRequestSerializerTest#testListSerialization()
 * KvStateRequestSerializerTest#testListSerialization() using the heap state back-end
 * test
 */
@Test
public void testListSerialization() throws Exception {
	final long key = 0L;

	// objects for RocksDB state list serialisation
	DBOptions dbOptions = PredefinedOptions.DEFAULT.createDBOptions();
	dbOptions.setCreateIfMissing(true);
	ExecutionConfig executionConfig = new ExecutionConfig();
	final RocksDBKeyedStateBackend<Long> longHeapKeyedStateBackend =
		new RocksDBKeyedStateBackendBuilder<>(
			"no-op",
			ClassLoader.getSystemClassLoader(),
			temporaryFolder.getRoot(),
			dbOptions,
			stateName -> PredefinedOptions.DEFAULT.createColumnOptions(),
			mock(TaskKvStateRegistry.class),
			LongSerializer.INSTANCE,
			1,
			new KeyGroupRange(0, 0),
			executionConfig,
			TestLocalRecoveryConfig.disabled(),
			RocksDBStateBackend.PriorityQueueStateType.HEAP,
			TtlTimeProvider.DEFAULT,
			new UnregisteredMetricsGroup(),
			Collections.emptyList(),
			AbstractStateBackend.getCompressionDecorator(executionConfig),
			new CloseableRegistry()
		).build();
	longHeapKeyedStateBackend.setCurrentKey(key);

	final InternalListState<Long, VoidNamespace, Long> listState = longHeapKeyedStateBackend.createInternalState(VoidNamespaceSerializer.INSTANCE,
			new ListStateDescriptor<>("test", LongSerializer.INSTANCE));

	KvStateRequestSerializerTest.testListSerialization(key, listState);
	longHeapKeyedStateBackend.dispose();
}
 
Example #25
Source File: KeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #26
Source File: TuningKeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example #27
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        ExecutionEnvironment bEnv   = ExecutionEnvironment.getExecutionEnvironment();
        ExistingSavepoint savepoint = Savepoint.load(bEnv, "hdfs://path/", new RocksDBStateBackend(""));

        DataSet<Integer> listState  = savepoint.readListState("zhisheng-uid", "list-state", Types.INT);

        DataSet<Integer> unionState = savepoint.readUnionState("zhisheng-uid", "union-state", Types.INT);

        DataSet<Tuple2<Integer, Integer>> broadcastState = savepoint.readBroadcastState("zhisheng-uid", "broadcast-state", Types.INT, Types.INT);

//        DataSet<Integer> listState = savepoint.readListState(
//                "zhisheng-uid", "list-state",
//                Types.INT, new MyCustomIntSerializer());
    }
 
Example #28
Source File: StatefulFunctionsSavepointCreator.java    From stateful-functions with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link StatefulFunctionsSavepointCreator}.
 *
 * @param maxParallelism max parallelism of the Stateful Functions application to be restored
 *     using the generated savepoint.
 */
public StatefulFunctionsSavepointCreator(int maxParallelism) {
  Preconditions.checkArgument(maxParallelism > 0);
  this.maxParallelism = maxParallelism;

  try {
    this.stateBackend = new RocksDBStateBackend("file:///tmp/ignored");
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  this.disableMultiplexState = false;
}
 
Example #29
Source File: StatefulFunctionsSavepointCreator.java    From flink-statefun with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link StatefulFunctionsSavepointCreator}.
 *
 * @param maxParallelism max parallelism of the Stateful Functions application to be restored
 *     using the generated savepoint.
 */
public StatefulFunctionsSavepointCreator(int maxParallelism) {
  Preconditions.checkArgument(maxParallelism > 0);
  this.maxParallelism = maxParallelism;

  try {
    this.stateBackend = new RocksDBStateBackend("file:///tmp/ignored");
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #30
Source File: BravoTestPipeline.java    From bravo with Apache License 2.0 5 votes vote down vote up
private StreamExecutionEnvironment createJobGraph(int parallelism,
		Function<DataStream<String>, DataStream<String>> pipelinerBuilder) throws Exception {
	final Path checkpointDir = getCheckpointDir();
	final Path savepointRootDir = getSavepointDir();

	checkpointDir.getFileSystem().mkdirs(checkpointDir);
	savepointRootDir.getFileSystem().mkdirs(savepointRootDir);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();
	env.getCheckpointConfig().enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setBufferTimeout(0);
	env.setParallelism(parallelism);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);

	env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir.toString(), true));

	DataStream<String> sourceData = env
			.addSource(new TestPipelineSource())
			.uid("TestSource")
			.name("TestSource")
			.setParallelism(1);

	pipelinerBuilder.apply(sourceData)
			.addSink(new CollectingSink()).name("Output").uid("Output")
			.setParallelism(1);

	return env;
}