org.apache.flink.streaming.api.CheckpointingMode Java Examples

The following examples show how to use org.apache.flink.streaming.api.CheckpointingMode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamingScalabilityAndLatency.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void runPartitioningProgram(int parallelism) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.getConfig().enableObjectReuse();

	env.setBufferTimeout(5L);
	env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);

	env
		.addSource(new TimeStampingSource())
		.map(new IdMapper<Tuple2<Long, Long>>())
		.keyBy(0)
		.addSink(new TimestampingSink());

	env.execute("Partitioning Program");
}
 
Example #2
Source File: InputProcessorUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
public static CheckpointedInputGate createCheckpointedInputGate(
		AbstractInvokable toNotifyOnCheckpoint,
		CheckpointingMode checkpointMode,
		IOManager ioManager,
		InputGate inputGate,
		Configuration taskManagerConfig,
		String taskName) throws IOException {

	int pageSize = ConfigurationParserUtils.getPageSize(taskManagerConfig);

	BufferStorage bufferStorage = createBufferStorage(
		checkpointMode, ioManager, pageSize, taskManagerConfig, taskName);
	CheckpointBarrierHandler barrierHandler = createCheckpointBarrierHandler(
		checkpointMode, inputGate.getNumberOfInputChannels(), taskName, toNotifyOnCheckpoint);
	return new CheckpointedInputGate(inputGate, bufferStorage, barrierHandler);
}
 
Example #3
Source File: InputProcessorUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
private static CheckpointBarrierHandler createCheckpointBarrierHandler(
		CheckpointingMode checkpointMode,
		int numberOfInputChannels,
		String taskName,
		AbstractInvokable toNotifyOnCheckpoint) {
	switch (checkpointMode) {
		case EXACTLY_ONCE:
			return new CheckpointBarrierAligner(
				numberOfInputChannels,
				taskName,
				toNotifyOnCheckpoint);
		case AT_LEAST_ONCE:
			return new CheckpointBarrierTracker(numberOfInputChannels, toNotifyOnCheckpoint);
		default:
			throw new UnsupportedOperationException("Unrecognized Checkpointing Mode: " + checkpointMode);
	}
}
 
Example #4
Source File: StreamingScalabilityAndLatency.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private static void runPartitioningProgram(int parallelism) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.getConfig().enableObjectReuse();

	env.setBufferTimeout(5L);
	env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);

	env
		.addSource(new TimeStampingSource())
		.map(new IdMapper<Tuple2<Long, Long>>())
		.keyBy(0)
		.addSink(new TimestampingSink());

	env.execute("Partitioning Program");
}
 
Example #5
Source File: AbstractOperatorRestoreTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
private JobGraph createJobGraph(ExecutionMode mode) {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStateBackend((StateBackend) new MemoryStateBackend());

	switch (mode) {
		case MIGRATE:
			createMigrationJob(env);
			break;
		case RESTORE:
			createRestoredJob(env);
			break;
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
 
Example #6
Source File: StreamOperator.java    From Alink with Apache License 2.0 6 votes vote down vote up
public static void setCheckPointConf() {
	StreamExecutionEnvironment env = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment();

	// start a checkpoint every 30 min
	env.enableCheckpointing(1800 * 1000L);

	// advanced options:

	// set mode to exactly-once (this is the default)
	env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);

	// make sure 500 ms of progress happen between checkpoints
	env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500);

	// checkpoints have to complete within one 30 min, or are discarded
	env.getCheckpointConfig().setCheckpointTimeout(1800 * 1000L);

	// allow only one checkpoint to be in progress at the same time
	env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
	//
	//// enable externalized checkpoints which are retained after job cancellation
	//env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup
	// .RETAIN_ON_CANCELLATION);
}
 
Example #7
Source File: StreamingScalabilityAndLatency.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void runPartitioningProgram(int parallelism) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.getConfig().enableObjectReuse();

	env.setBufferTimeout(5L);
	env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);

	env
		.addSource(new TimeStampingSource())
		.map(new IdMapper<Tuple2<Long, Long>>())
		.keyBy(0)
		.addSink(new TimestampingSink());

	env.execute("Partitioning Program");
}
 
Example #8
Source File: AbstractOperatorRestoreTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
private JobGraph createJobGraph(ExecutionMode mode) {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStateBackend((StateBackend) new MemoryStateBackend());

	switch (mode) {
		case MIGRATE:
			createMigrationJob(env);
			break;
		case RESTORE:
			createRestoredJob(env);
			break;
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
 
Example #9
Source File: BootstrapTransformation.java    From flink with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
StreamConfig getConfig(OperatorID operatorID, StateBackend stateBackend, StreamOperator<TaggedOperatorSubtaskState> operator) {
	// Eagerly perform a deep copy of the configuration, otherwise it will result in undefined behavior
	// when deploying with multiple bootstrap transformations.
	Configuration deepCopy = new Configuration(dataSet.getExecutionEnvironment().getConfiguration());
	final StreamConfig config = new StreamConfig(deepCopy);
	config.setChainStart();
	config.setCheckpointingEnabled(true);
	config.setCheckpointMode(CheckpointingMode.EXACTLY_ONCE);

	if (keyType != null) {
		TypeSerializer<?> keySerializer = keyType.createSerializer(dataSet.getExecutionEnvironment().getConfig());

		config.setStateKeySerializer(keySerializer);
		config.setStatePartitioner(0, originalKeySelector);
	}

	config.setStreamOperator(operator);
	config.setOperatorName(operatorID.toHexString());
	config.setOperatorID(operatorID);
	config.setStateBackend(stateBackend);
	return config;
}
 
Example #10
Source File: UnionListStateExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }
 
Example #11
Source File: InputProcessorUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
private static BufferStorage createBufferStorage(
		CheckpointingMode checkpointMode,
		IOManager ioManager,
		int pageSize,
		Configuration taskManagerConfig,
		String taskName) throws IOException {
	switch (checkpointMode) {
		case EXACTLY_ONCE: {
			long maxAlign = taskManagerConfig.getLong(TaskManagerOptions.TASK_CHECKPOINT_ALIGNMENT_BYTES_LIMIT);
			if (!(maxAlign == -1 || maxAlign > 0)) {
				throw new IllegalConfigurationException(
					TaskManagerOptions.TASK_CHECKPOINT_ALIGNMENT_BYTES_LIMIT.key()
						+ " must be positive or -1 (infinite)");
			}

			if (taskManagerConfig.getBoolean(NettyShuffleEnvironmentOptions.NETWORK_CREDIT_MODEL)) {
				return new CachedBufferStorage(pageSize, maxAlign, taskName);
			} else {
				return new BufferSpiller(ioManager, pageSize, maxAlign, taskName);
			}
		}
		case AT_LEAST_ONCE:
			return new EmptyBufferStorage();
		default:
			throw new UnsupportedOperationException("Unrecognized Checkpointing Mode: " + checkpointMode);
	}
}
 
Example #12
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testEnabledUnalignedCheckAndDisabledCheckpointing() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.fromElements(0).print();
	StreamGraph streamGraph = env.getStreamGraph();
	assertFalse("Checkpointing enabled", streamGraph.getCheckpointConfig().isCheckpointingEnabled());
	env.getCheckpointConfig().enableUnalignedCheckpoints(true);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	StreamConfig streamConfig = new StreamConfig(verticesSorted.get(0).getConfiguration());
	assertEquals(CheckpointingMode.AT_LEAST_ONCE, streamConfig.getCheckpointMode());
	assertFalse(streamConfig.isUnalignedCheckpointsEnabled());
}
 
Example #13
Source File: RegionFailoverITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();
		env.getConfig().disableSysoutLogging();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example #14
Source File: BoundedStreamConfig.java    From flink with Apache License 2.0 5 votes vote down vote up
public BoundedStreamConfig() {
	super(new Configuration());

	setChainStart();
	setCheckpointingEnabled(true);
	setCheckpointMode(CheckpointingMode.EXACTLY_ONCE);
}
 
Example #15
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
	long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
	CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
		? CheckpointingMode.EXACTLY_ONCE
		: CheckpointingMode.AT_LEAST_ONCE;

	env.enableCheckpointing(checkpointInterval, checkpointingMode);

	boolean enableExternalizedCheckpoints = pt.getBoolean(
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

	if (enableExternalizedCheckpoints) {
		String cleanupModeConfig = pt.get(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

		CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
		switch (cleanupModeConfig) {
			case "retain":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
				break;
			case "delete":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
				break;
			default:
				throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
		}
		env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);

		final int tolerableDeclinedCheckpointNumber = pt.getInt(
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(),
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue());
		env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber);
	}
}
 
Example #16
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnalignedCheckAndAtLeastOnce() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.fromElements(0).print();
	StreamGraph streamGraph = env.getStreamGraph();
	env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);
	env.getCheckpointConfig().enableUnalignedCheckpoints(true);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	StreamConfig streamConfig = new StreamConfig(verticesSorted.get(0).getConfiguration());
	assertEquals(CheckpointingMode.AT_LEAST_ONCE, streamConfig.getCheckpointMode());
	assertFalse(streamConfig.isUnalignedCheckpointsEnabled());
}
 
Example #17
Source File: RedisSetUvExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #18
Source File: StreamingJobGraphGenerator.java    From flink with Apache License 2.0 5 votes vote down vote up
private CheckpointingMode getCheckpointingMode(CheckpointConfig checkpointConfig) {
	CheckpointingMode checkpointingMode = checkpointConfig.getCheckpointingMode();

	checkArgument(checkpointingMode == CheckpointingMode.EXACTLY_ONCE ||
		checkpointingMode == CheckpointingMode.AT_LEAST_ONCE, "Unexpected checkpointing mode.");

	if (checkpointConfig.isCheckpointingEnabled()) {
		return checkpointingMode;
	} else {
		// the "at-least-once" input handler is slightly cheaper (in the absence of checkpoints),
		// so we use that one if checkpointing is not enabled
		return CheckpointingMode.AT_LEAST_ONCE;
	}
}
 
Example #19
Source File: InputProcessorUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * @return a pair of {@link CheckpointedInputGate} created for two corresponding
 * {@link InputGate}s supplied as parameters.
 */
public static CheckpointedInputGate[] createCheckpointedInputGatePair(
		AbstractInvokable toNotifyOnCheckpoint,
		CheckpointingMode checkpointMode,
		IOManager ioManager,
		InputGate inputGate1,
		InputGate inputGate2,
		Configuration taskManagerConfig,
		String taskName) throws IOException {

	int pageSize = ConfigurationParserUtils.getPageSize(taskManagerConfig);

	BufferStorage mainBufferStorage1 = createBufferStorage(
		checkpointMode, ioManager, pageSize, taskManagerConfig, taskName);
	BufferStorage mainBufferStorage2 = createBufferStorage(
		checkpointMode, ioManager, pageSize, taskManagerConfig, taskName);
	checkState(mainBufferStorage1.getMaxBufferedBytes() == mainBufferStorage2.getMaxBufferedBytes());

	BufferStorage linkedBufferStorage1 = new LinkedBufferStorage(
		mainBufferStorage1,
		mainBufferStorage2,
		mainBufferStorage1.getMaxBufferedBytes());
	BufferStorage linkedBufferStorage2 = new LinkedBufferStorage(
		mainBufferStorage2,
		mainBufferStorage1,
		mainBufferStorage1.getMaxBufferedBytes());

	CheckpointBarrierHandler barrierHandler = createCheckpointBarrierHandler(
		checkpointMode,
		inputGate1.getNumberOfInputChannels() + inputGate2.getNumberOfInputChannels(),
		taskName,
		toNotifyOnCheckpoint);
	return new CheckpointedInputGate[] {
		new CheckpointedInputGate(inputGate1, linkedBufferStorage1, barrierHandler),
		new CheckpointedInputGate(inputGate2, linkedBufferStorage2, barrierHandler, inputGate1.getNumberOfInputChannels())
	};
}
 
Example #20
Source File: StreamConfig.java    From flink with Apache License 2.0 5 votes vote down vote up
public CheckpointingMode getCheckpointMode() {
	int ordinal = config.getInteger(CHECKPOINT_MODE, -1);
	if (ordinal >= 0) {
		return CheckpointingMode.values()[ordinal];
	} else {
		return DEFAULT_CHECKPOINTING_MODE;
	}
}
 
Example #21
Source File: BravoTestPipeline.java    From bravo with Apache License 2.0 5 votes vote down vote up
private StreamExecutionEnvironment createJobGraph(int parallelism,
		Function<DataStream<String>, DataStream<String>> pipelinerBuilder) throws Exception {
	final Path checkpointDir = getCheckpointDir();
	final Path savepointRootDir = getSavepointDir();

	checkpointDir.getFileSystem().mkdirs(checkpointDir);
	savepointRootDir.getFileSystem().mkdirs(savepointRootDir);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();
	env.getCheckpointConfig().enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setBufferTimeout(0);
	env.setParallelism(parallelism);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);

	env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir.toString(), true));

	DataStream<String> sourceData = env
			.addSource(new TestPipelineSource())
			.uid("TestSource")
			.name("TestSource")
			.setParallelism(1);

	pipelinerBuilder.apply(sourceData)
			.addSink(new CollectingSink()).name("Output").uid("Output")
			.setParallelism(1);

	return env;
}
 
Example #22
Source File: HyperLogLogUvExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #23
Source File: MapStateUvExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId,说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中,并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key,格式为 日期_pageId,如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }
 
Example #24
Source File: TuningKeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example #25
Source File: KeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #26
Source File: UnionListStateExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }
 
Example #27
Source File: FlinkPipelineOptionsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** These defaults should only be changed with a very good reason. */
@Test
public void testDefaults() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  assertThat(options.getParallelism(), is(-1));
  assertThat(options.getMaxParallelism(), is(-1));
  assertThat(options.getFlinkMaster(), is("[auto]"));
  assertThat(options.getFilesToStage(), is(nullValue()));
  assertThat(options.getLatencyTrackingInterval(), is(0L));
  assertThat(options.getShutdownSourcesAfterIdleMs(), is(-1L));
  assertThat(options.getObjectReuse(), is(false));
  assertThat(options.getCheckpointingMode(), is(CheckpointingMode.EXACTLY_ONCE.name()));
  assertThat(options.getMinPauseBetweenCheckpoints(), is(-1L));
  assertThat(options.getCheckpointingInterval(), is(-1L));
  assertThat(options.getCheckpointTimeoutMillis(), is(-1L));
  assertThat(options.getNumConcurrentCheckpoints(), is(1));
  assertThat(options.getFailOnCheckpointingErrors(), is(true));
  assertThat(options.getFinishBundleBeforeCheckpointing(), is(false));
  assertThat(options.getNumberOfExecutionRetries(), is(-1));
  assertThat(options.getExecutionRetryDelay(), is(-1L));
  assertThat(options.getRetainExternalizedCheckpointsOnCancellation(), is(false));
  assertThat(options.getStateBackendFactory(), is(nullValue()));
  assertThat(options.getMaxBundleSize(), is(1000L));
  assertThat(options.getMaxBundleTimeMills(), is(1000L));
  assertThat(options.getExecutionModeForBatch(), is(ExecutionMode.PIPELINED.name()));
  assertThat(options.getSavepointPath(), is(nullValue()));
  assertThat(options.getAllowNonRestoredState(), is(false));
  assertThat(options.getDisableMetrics(), is(false));
}
 
Example #28
Source File: NotifyCheckpointAbortedITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Verify operators would be notified as checkpoint aborted.
 *
 * <p>The job would run with at least two checkpoints. The 1st checkpoint would fail due to add checkpoint to store,
 * and the 2nd checkpoint would decline by async checkpoint phase of 'DeclineSink'.
 *
 * <p>The job graph looks like:
 * NormalSource --> keyBy --> NormalMap --> DeclineSink
 */
@Test(timeout = TEST_TIMEOUT)
public void testNotifyCheckpointAborted() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
	env.getCheckpointConfig().enableUnalignedCheckpoints(unalignedCheckpointEnabled);
	env.getCheckpointConfig().setTolerableCheckpointFailureNumber(1);
	env.disableOperatorChaining();
	env.setParallelism(1);

	final StateBackend failingStateBackend = new DeclineSinkFailingStateBackend(checkpointPath);
	env.setStateBackend(failingStateBackend);

	env.addSource(new NormalSource()).name("NormalSource")
		.keyBy((KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0)
		.transform("NormalMap", TypeInformation.of(Integer.class), new NormalMap())
		.transform(DECLINE_SINK_NAME, TypeInformation.of(Object.class), new DeclineSink());

	final ClusterClient<?> clusterClient = cluster.getClusterClient();
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();
	JobID jobID = jobGraph.getJobID();

	ClientUtils.submitJob(clusterClient, jobGraph);

	TestingCompletedCheckpointStore.addCheckpointLatch.await();
	TestingCompletedCheckpointStore.abortCheckpointLatch.trigger();

	verifyAllOperatorsNotifyAborted();
	resetAllOperatorsNotifyAbortedLatches();
	verifyAllOperatorsNotifyAbortedTimes(1);

	DeclineSink.waitLatch.trigger();
	verifyAllOperatorsNotifyAborted();
	verifyAllOperatorsNotifyAbortedTimes(2);

	clusterClient.cancel(jobID).get();
}
 
Example #29
Source File: RegionFailoverITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example #30
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
	long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
	CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
		? CheckpointingMode.EXACTLY_ONCE
		: CheckpointingMode.AT_LEAST_ONCE;

	env.enableCheckpointing(checkpointInterval, checkpointingMode);

	boolean enableExternalizedCheckpoints = pt.getBoolean(
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

	if (enableExternalizedCheckpoints) {
		String cleanupModeConfig = pt.get(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

		CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
		switch (cleanupModeConfig) {
			case "retain":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
				break;
			case "delete":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
				break;
			default:
				throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
		}
		env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);

		final int tolerableDeclinedCheckpointNumber = pt.getInt(
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(),
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue());
		env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber);
	}
}