Java Code Examples for org.apache.flink.streaming.api.environment.CheckpointConfig

The following examples show how to use org.apache.flink.streaming.api.environment.CheckpointConfig. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: flink   Author: flink-tpc-ds   File: CheckpointExceptionHandlerConfigurationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSetCheckpointConfig() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();

	// use deprecated API to set not fail on checkpoint errors
	checkpointConfig.setFailOnCheckpointingErrors(false);
	Assert.assertFalse(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(CheckpointFailureManager.UNLIMITED_TOLERABLE_FAILURE_NUMBER, checkpointConfig.getTolerableCheckpointFailureNumber());

	// use new API to set tolerable declined checkpoint number
	checkpointConfig.setTolerableCheckpointFailureNumber(5);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());

	// after we configure the tolerable declined checkpoint number, deprecated API would not take effect
	checkpointConfig.setFailOnCheckpointingErrors(true);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());
}
 
Example #2
Source Project: flink   Author: apache   File: CheckpointExceptionHandlerConfigurationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSetCheckpointConfig() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();

	// use deprecated API to set not fail on checkpoint errors
	checkpointConfig.setFailOnCheckpointingErrors(false);
	Assert.assertFalse(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(CheckpointFailureManager.UNLIMITED_TOLERABLE_FAILURE_NUMBER, checkpointConfig.getTolerableCheckpointFailureNumber());

	// use new API to set tolerable declined checkpoint number
	checkpointConfig.setTolerableCheckpointFailureNumber(5);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());

	// after we configure the tolerable declined checkpoint number, deprecated API would not take effect
	checkpointConfig.setFailOnCheckpointingErrors(true);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());
}
 
Example #3
Source Project: flink-learning   Author: zhisheng17   File: HyperLogLogUvExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #4
Source Project: flink-learning   Author: zhisheng17   File: RedisSetUvExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #5
Source Project: flink-learning   Author: zhisheng17   File: TuningKeyedStateDeduplication.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example #6
Source Project: flink-learning   Author: zhisheng17   File: KeyedStateDeduplication.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #7
Source Project: flink-learning   Author: zhisheng17   File: UnionListStateExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }
 
Example #8
Source Project: flink   Author: flink-tpc-ds   File: RegionFailoverITCase.java    License: Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();
		env.getConfig().disableSysoutLogging();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example #9
Source Project: flink   Author: flink-tpc-ds   File: DataStreamAllroundTestJobFactory.java    License: Apache License 2.0 5 votes vote down vote up
private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
	long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
	CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
		? CheckpointingMode.EXACTLY_ONCE
		: CheckpointingMode.AT_LEAST_ONCE;

	env.enableCheckpointing(checkpointInterval, checkpointingMode);

	boolean enableExternalizedCheckpoints = pt.getBoolean(
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

	if (enableExternalizedCheckpoints) {
		String cleanupModeConfig = pt.get(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

		CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
		switch (cleanupModeConfig) {
			case "retain":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
				break;
			case "delete":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
				break;
			default:
				throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
		}
		env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);

		final int tolerableDeclinedCheckpointNumber = pt.getInt(
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(),
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue());
		env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber);
	}
}
 
Example #10
Source Project: flink   Author: flink-tpc-ds   File: StreamGraph.java    License: Apache License 2.0 5 votes vote down vote up
public StreamGraph(ExecutionConfig executionConfig, CheckpointConfig checkpointConfig) {
	this.executionConfig = checkNotNull(executionConfig);
	this.checkpointConfig = checkNotNull(checkpointConfig);

	// create an empty new stream graph.
	clear();
}
 
Example #11
Source Project: flink   Author: flink-tpc-ds   File: CheckpointExceptionHandlerConfigurationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointConfigDefault() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();
	Assert.assertTrue(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(0, checkpointConfig.getTolerableCheckpointFailureNumber());
}
 
Example #12
Source Project: flink-learning   Author: zhisheng17   File: HyperLogLogUvExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #13
Source Project: flink-learning   Author: zhisheng17   File: MapStateUvExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId,说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中,并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #14
Source Project: flink-learning   Author: zhisheng17   File: RedisSetUvExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #15
Source Project: flink-learning   Author: zhisheng17   File: TuningKeyedStateDeduplication.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example #16
Source Project: flink-learning   Author: zhisheng17   File: KeyedStateDeduplication.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #17
Source Project: flink-learning   Author: zhisheng17   File: UnionListStateExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }
 
Example #18
Source Project: flink   Author: apache   File: RegionFailoverITCase.java    License: Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example #19
Source Project: flink   Author: apache   File: DataStreamAllroundTestJobFactory.java    License: Apache License 2.0 5 votes vote down vote up
private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
	long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
	CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
		? CheckpointingMode.EXACTLY_ONCE
		: CheckpointingMode.AT_LEAST_ONCE;

	env.enableCheckpointing(checkpointInterval, checkpointingMode);

	boolean enableExternalizedCheckpoints = pt.getBoolean(
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

	if (enableExternalizedCheckpoints) {
		String cleanupModeConfig = pt.get(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

		CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
		switch (cleanupModeConfig) {
			case "retain":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
				break;
			case "delete":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
				break;
			default:
				throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
		}
		env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);

		final int tolerableDeclinedCheckpointNumber = pt.getInt(
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(),
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue());
		env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber);
	}
}
 
Example #20
Source Project: flink   Author: apache   File: StreamGraph.java    License: Apache License 2.0 5 votes vote down vote up
public StreamGraph(ExecutionConfig executionConfig, CheckpointConfig checkpointConfig, SavepointRestoreSettings savepointRestoreSettings) {
	this.executionConfig = checkNotNull(executionConfig);
	this.checkpointConfig = checkNotNull(checkpointConfig);
	this.savepointRestoreSettings = checkNotNull(savepointRestoreSettings);

	// create an empty new stream graph.
	clear();
}
 
Example #21
Source Project: flink   Author: apache   File: StreamingJobGraphGenerator.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
private void preValidate() {
	CheckpointConfig checkpointConfig = streamGraph.getCheckpointConfig();

	if (checkpointConfig.isCheckpointingEnabled()) {
		// temporarily forbid checkpointing for iterative jobs
		if (streamGraph.isIterative() && !checkpointConfig.isForceCheckpointing()) {
			throw new UnsupportedOperationException(
				"Checkpointing is currently not supported by default for iterative jobs, as we cannot guarantee exactly once semantics. "
					+ "State checkpoints happen normally, but records in-transit during the snapshot will be lost upon failure. "
					+ "\nThe user can force enable state checkpoints with the reduced guarantees by calling: env.enableCheckpointing(interval,true)");
		}

		ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
		for (StreamNode node : streamGraph.getStreamNodes()) {
			StreamOperatorFactory operatorFactory = node.getOperatorFactory();
			if (operatorFactory != null) {
				Class<?> operatorClass = operatorFactory.getStreamOperatorClass(classLoader);
				if (InputSelectable.class.isAssignableFrom(operatorClass)) {

					throw new UnsupportedOperationException(
						"Checkpointing is currently not supported for operators that implement InputSelectable:"
							+ operatorClass.getName());
				}
			}
		}
	}

	if (checkpointConfig.isUnalignedCheckpointsEnabled() && getCheckpointingMode(checkpointConfig) != CheckpointingMode.EXACTLY_ONCE) {
		LOG.warn("Unaligned checkpoints can only be used with checkpointing mode EXACTLY_ONCE");
		checkpointConfig.enableUnalignedCheckpoints(false);
	}
}
 
Example #22
Source Project: flink   Author: apache   File: StreamingJobGraphGenerator.java    License: Apache License 2.0 5 votes vote down vote up
private CheckpointingMode getCheckpointingMode(CheckpointConfig checkpointConfig) {
	CheckpointingMode checkpointingMode = checkpointConfig.getCheckpointingMode();

	checkArgument(checkpointingMode == CheckpointingMode.EXACTLY_ONCE ||
		checkpointingMode == CheckpointingMode.AT_LEAST_ONCE, "Unexpected checkpointing mode.");

	if (checkpointConfig.isCheckpointingEnabled()) {
		return checkpointingMode;
	} else {
		// the "at-least-once" input handler is slightly cheaper (in the absence of checkpoints),
		// so we use that one if checkpointing is not enabled
		return CheckpointingMode.AT_LEAST_ONCE;
	}
}
 
Example #23
Source Project: flink   Author: apache   File: StreamingJobGraphGeneratorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void generatorForwardsSavepointRestoreSettings() {
	StreamGraph streamGraph = new StreamGraph(
			new ExecutionConfig(),
			new CheckpointConfig(),
			SavepointRestoreSettings.forPath("hello"));

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	SavepointRestoreSettings savepointRestoreSettings = jobGraph.getSavepointRestoreSettings();
	assertThat(savepointRestoreSettings.getRestorePath(), is("hello"));
}
 
Example #24
Source Project: flink   Author: apache   File: StreamGraphGeneratorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void generatorForwardsSavepointRestoreSettings() {
	StreamGraphGenerator streamGraphGenerator =
			new StreamGraphGenerator(Collections.emptyList(),
			new ExecutionConfig(),
			new CheckpointConfig());

	streamGraphGenerator.setSavepointRestoreSettings(SavepointRestoreSettings.forPath("hello"));

	StreamGraph streamGraph = streamGraphGenerator.generate();
	assertThat(streamGraph.getSavepointRestoreSettings().getRestorePath(), is("hello"));
}
 
Example #25
Source Project: flink   Author: apache   File: CheckpointExceptionHandlerConfigurationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointConfigDefault() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();
	Assert.assertTrue(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(0, checkpointConfig.getTolerableCheckpointFailureNumber());
}
 
Example #26
Source Project: Flink-CEPplus   Author: ljygz   File: DataStreamAllroundTestJobFactory.java    License: Apache License 2.0 4 votes vote down vote up
public static void setupEnvironment(StreamExecutionEnvironment env, ParameterTool pt) throws Exception {

		// set checkpointing semantics
		String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
		long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
		CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
			? CheckpointingMode.EXACTLY_ONCE
			: CheckpointingMode.AT_LEAST_ONCE;

		env.enableCheckpointing(checkpointInterval, checkpointingMode);

		// use event time
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// parallelism
		env.setParallelism(pt.getInt(ENVIRONMENT_PARALLELISM.key(), ENVIRONMENT_PARALLELISM.defaultValue()));
		env.setMaxParallelism(pt.getInt(ENVIRONMENT_MAX_PARALLELISM.key(), ENVIRONMENT_MAX_PARALLELISM.defaultValue()));

		// restart strategy
		String restartStrategyConfig = pt.get(ENVIRONMENT_RESTART_STRATEGY.key());
		if (restartStrategyConfig != null) {
			RestartStrategies.RestartStrategyConfiguration restartStrategy;
			switch (restartStrategyConfig) {
				case "fixed_delay":
					restartStrategy = RestartStrategies.fixedDelayRestart(
						pt.getInt(
							ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.key(),
							ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.defaultValue()),
						pt.getLong(
							ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.key(),
							ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.defaultValue()));
					break;
				case "no_restart":
					restartStrategy = RestartStrategies.noRestart();
					break;
				default:
					throw new IllegalArgumentException("Unkown restart strategy: " + restartStrategyConfig);
			}
			env.setRestartStrategy(restartStrategy);
		}

		// state backend
		final String stateBackend = pt.get(
			STATE_BACKEND.key(),
			STATE_BACKEND.defaultValue());

		final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key());

		if ("file".equalsIgnoreCase(stateBackend)) {
			boolean asyncCheckpoints = pt.getBoolean(
				STATE_BACKEND_FILE_ASYNC.key(),
				STATE_BACKEND_FILE_ASYNC.defaultValue());

			env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints));
		} else if ("rocks".equalsIgnoreCase(stateBackend)) {
			boolean incrementalCheckpoints = pt.getBoolean(
				STATE_BACKEND_ROCKS_INCREMENTAL.key(),
				STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue());

			env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
		} else {
			throw new IllegalArgumentException("Unknown backend requested: " + stateBackend);
		}

		boolean enableExternalizedCheckpoints = pt.getBoolean(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

		if (enableExternalizedCheckpoints) {
			String cleanupModeConfig = pt.get(
				ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
				ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

			CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
			switch (cleanupModeConfig) {
				case "retain":
					cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
					break;
				case "delete":
					cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
					break;
				default:
					throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
			}

			env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(pt);
	}
 
Example #27
Source Project: Flink-CEPplus   Author: ljygz   File: StickyAllocationAndLocalRecoveryTestJob.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool pt = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env.setParallelism(pt.getInt("parallelism", 1));
		env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
		env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
		if (pt.getBoolean("externalizedCheckpoints", false)) {
			env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		}

		String stateBackend = pt.get("stateBackend", "file");
		String checkpointDir = pt.getRequired("checkpointDir");

		boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);

		if ("file".equals(stateBackend)) {
			boolean asyncCheckpoints = pt.getBoolean("asyncCheckpoints", true);
			env.setStateBackend(new FsStateBackend(checkpointDir, asyncCheckpoints));
		} else if ("rocks".equals(stateBackend)) {
			boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
			env.setStateBackend(new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
		} else {
			throw new IllegalArgumentException("Unknown backend: " + stateBackend);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(pt);

		// delay to throttle down the production of the source
		long delay = pt.getLong("delay", 0L);

		// the maximum number of attempts, before the job finishes with success
		int maxAttempts = pt.getInt("maxAttempts", 3);

		// size of one artificial value
		int valueSize = pt.getInt("valueSize", 10);

		env.addSource(new RandomLongSource(maxAttempts, delay))
			.keyBy((KeySelector<Long, Long>) aLong -> aLong)
			.flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail))
			.addSink(new PrintSinkFunction<>());

		env.execute("Sticky Allocation And Local Recovery Test");
	}
 
Example #28
Source Project: Flink-CEPplus   Author: ljygz   File: StreamGraph.java    License: Apache License 2.0 4 votes vote down vote up
public CheckpointConfig getCheckpointConfig() {
	return checkpointConfig;
}
 
Example #29
Source Project: flink-learning   Author: zhisheng17   File: PvStatLocalKeyByExactlyOnce.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次 Checkpoint
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // Checkpoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest();


        env.addSource(appKafkaConsumer)
                .flatMap(new LocalKeyByFlatMap(10))
                // 按照 appId 进行 keyBy
                .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0)
                .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {
                    private ValueState<Long> pvState;
                    private long pv = 0;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        super.open(parameters);
                        // 初始化状态
                        pvState = getRuntimeContext().getState(
                                new ValueStateDescriptor<>("pvStat",
                                        TypeInformation.of(new TypeHint<Long>() {
                                        })));
                    }

                    @Override
                    public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                        // 从状态中获取该 app 的pv值,加上新收到的 pv 值以后后,update 到状态中
                        if (null == pvState.value()) {
                            log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1);
                            pv = tuple2.f1;
                        } else {
                            pv = pvState.value();
                            pv += tuple2.f1;
                            log.info("{} is old, PV is {}", tuple2.f0, pv);
                        }
                        pvState.update(pv);
                        tuple2.setField(pv, 1);
                        return tuple2;
                    }
                })
                .print();

        env.execute("Flink pv stat LocalKeyBy");
    }
 
Example #30
Source Project: flink-learning   Author: zhisheng17   File: MapStateUvExample.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId,说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中,并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }