org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: KafkaDynamicTableFactoryTestBase.java From flink with Apache License 2.0

6 votes

@Test
public void testTableSourceCommitOnCheckpointsDisabled() {
	//Construct table source using options and table source factory
	ObjectIdentifier objectIdentifier = ObjectIdentifier.of(
		"default",
		"default",
		"scanTable");
	Map<String, String> tableOptions = getFullSourceOptions();
	tableOptions.remove("properties.group.id");
	CatalogTable catalogTable = createKafkaSourceCatalogTable(tableOptions);
	final DynamicTableSource tableSource = FactoryUtil.createTableSource(null,
		objectIdentifier,
		catalogTable,
		new Configuration(),
		Thread.currentThread().getContextClassLoader());

	// Test commitOnCheckpoints flag should be false when do not set consumer group.
	assertThat(tableSource, instanceOf(KafkaDynamicSourceBase.class));
	ScanTableSource.ScanRuntimeProvider providerWithoutGroupId = ((KafkaDynamicSourceBase) tableSource)
		.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
	assertThat(providerWithoutGroupId, instanceOf(SourceFunctionProvider.class));
	final SourceFunctionProvider functionProviderWithoutGroupId = (SourceFunctionProvider) providerWithoutGroupId;
	final SourceFunction<RowData> function = functionProviderWithoutGroupId.createSourceFunction();
	assertFalse(((FlinkKafkaConsumerBase) function).getEnableCommitOnCheckpoints());
}

Example #2

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

5 votes

/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode();

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}

Example #3

Source File: KafkaDynamicSourceBase.java From flink with Apache License 2.0

5 votes

/**
 * Returns a version-specific Kafka consumer with the start position configured.
 *
 * @param topic                 Kafka topic to consume.
 * @param properties            Properties for the Kafka consumer.
 * @param deserializationSchema Deserialization schema to use for Kafka records.
 * @return The version-specific Kafka consumer
 */
protected FlinkKafkaConsumerBase<RowData> getKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	FlinkKafkaConsumerBase<RowData> kafkaConsumer =
			createKafkaConsumer(topic, properties, deserializationSchema);
	switch (startupMode) {
		case EARLIEST:
			kafkaConsumer.setStartFromEarliest();
			break;
		case LATEST:
			kafkaConsumer.setStartFromLatest();
			break;
		case GROUP_OFFSETS:
			kafkaConsumer.setStartFromGroupOffsets();
			break;
		case SPECIFIC_OFFSETS:
			kafkaConsumer.setStartFromSpecificOffsets(specificStartupOffsets);
			break;
		case TIMESTAMP:
			kafkaConsumer.setStartFromTimestamp(startupTimestampMillis);
			break;
		}
	kafkaConsumer.setCommitOffsetsOnCheckpoints(properties.getProperty("group.id") != null);
	return kafkaConsumer;
}

Example #4

Source File: KafkaDynamicSourceBase.java From flink with Apache License 2.0

5 votes

@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
	DeserializationSchema<RowData> deserializationSchema =
			this.decodingFormat.createRuntimeDecoder(runtimeProviderContext, this.outputDataType);
	// Version-specific Kafka consumer
	FlinkKafkaConsumerBase<RowData> kafkaConsumer =
			getKafkaConsumer(topic, properties, deserializationSchema);
	return SourceFunctionProvider.of(kafkaConsumer, false);
}

Example #5

Source File: Kafka010DynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer010<>(topic, deserializationSchema, properties);
}

Example #6

Source File: Kafka011DynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer011<>(topic, deserializationSchema, properties);
}

Example #7

Source File: KafkaDynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer<>(topic, deserializationSchema, properties);
}

Example #8

Source File: KafkaTableSource.java From df_data_service with Apache License 2.0

5 votes

@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {
    // Version-specific Kafka consumer
    FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
    DataStream<Row> kafkaSource = env.addSource(kafkaConsumer);
    return kafkaSource;
}

Example #9

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

5 votes

/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}

Example #10

Source File: KafkaSource09.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer09<>(topicSets, deserializationSchema, properties);
}

Example #11

Source File: KafkaSource.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer010<>(
            topicSets,
            deserializationSchema,
            properties);
}

Example #12

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

5 votes

/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //largest   smallest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}

Example #13

Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Example #14

Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Example #15

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Example #16

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Example #17

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Example #18

Source File: KafkaSource08.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"auto.commit.enable"-> true
    //"auto.commit.interval.ms" -> 90000
    FlinkKafkaConsumer08<Row> kafkaConsumer08 = new FlinkKafkaConsumer08<>(topicSets, deserializationSchema, properties);
    //kafkaConsumer08.setCommitOffsetsOnCheckpoints(true);
    return kafkaConsumer08;
}

Example #19

Source File: MapStateUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId，说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中，并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Example #20

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Example #21

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Example #22

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Example #23

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

4 votes

public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);

Example #24

Source File: Kafka010AvroTableSource.java From df_data_service with Apache License 2.0

4 votes

@Override
FlinkKafkaConsumerBase<Row> getKafkaConsumer(String topic, Properties properties, DeserializationSchema<Row> deserializationSchema) {
    return new FlinkKafkaConsumer010<>(topic, deserializationSchema, properties);
}

Example #25

Source File: Kafka09AvroTableSource.java From df_data_service with Apache License 2.0

4 votes

@Override
FlinkKafkaConsumerBase<Row> getKafkaConsumer(String topic, Properties properties, DeserializationSchema<Row> deserializationSchema) {
    return new FlinkKafkaConsumer09<>(topic, deserializationSchema, properties);
}

Example #26

Source File: PvStatLocalKeyByExactlyOnce.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次 Checkpoint
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // Checkpoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest();


        env.addSource(appKafkaConsumer)
                .flatMap(new LocalKeyByFlatMap(10))
                // 按照 appId 进行 keyBy
                .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0)
                .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {
                    private ValueState<Long> pvState;
                    private long pv = 0;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        super.open(parameters);
                        // 初始化状态
                        pvState = getRuntimeContext().getState(
                                new ValueStateDescriptor<>("pvStat",
                                        TypeInformation.of(new TypeHint<Long>() {
                                        })));
                    }

                    @Override
                    public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                        // 从状态中获取该 app 的pv值，加上新收到的 pv 值以后后，update 到状态中
                        if (null == pvState.value()) {
                            log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1);
                            pv = tuple2.f1;
                        } else {
                            pv = pvState.value();
                            pv += tuple2.f1;
                            log.info("{} is old, PV is {}", tuple2.f0, pv);
                        }
                        pvState.update(pv);
                        tuple2.setField(pv, 1);
                        return tuple2;
                    }
                })
                .print();

        env.execute("Flink pv stat LocalKeyBy");
    }

Example #27

Source File: PvStatLocalKeyByExactlyOnce.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次 Checkpoint
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // Checkpoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest();


        env.addSource(appKafkaConsumer)
                .flatMap(new LocalKeyByFlatMap(10))
                // 按照 appId 进行 keyBy
                .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0)
                .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {
                    private ValueState<Long> pvState;
                    private long pv = 0;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        super.open(parameters);
                        // 初始化状态
                        pvState = getRuntimeContext().getState(
                                new ValueStateDescriptor<>("pvStat",
                                        TypeInformation.of(new TypeHint<Long>() {
                                        })));
                    }

                    @Override
                    public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                        // 从状态中获取该 app 的pv值，加上新收到的 pv 值以后后，update 到状态中
                        if (null == pvState.value()) {
                            log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1);
                            pv = tuple2.f1;
                        } else {
                            pv = pvState.value();
                            pv += tuple2.f1;
                            log.info("{} is old, PV is {}", tuple2.f0, pv);
                        }
                        pvState.update(pv);
                        tuple2.setField(pv, 1);
                        return tuple2;
                    }
                })
                .print();

        env.execute("Flink pv stat LocalKeyBy");
    }

Example #28

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

4 votes

public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);

Example #29

Source File: MapStateUvExample.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId，说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中，并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Example #30

Source File: KafkaDynamicTableFactoryTestBase.java From flink with Apache License 2.0

4 votes

@Test
@SuppressWarnings("unchecked")
public void testTableSource() {
	// prepare parameters for Kafka table source
	final DataType producedDataType = SOURCE_SCHEMA.toPhysicalRowDataType();

	final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
	specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_0), OFFSET_0);
	specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_1), OFFSET_1);

	DecodingFormat<DeserializationSchema<RowData>> decodingFormat =
			new TestFormatFactory.DecodingFormatMock(",", true);

	// Construct table source using options and table source factory
	ObjectIdentifier objectIdentifier = ObjectIdentifier.of(
			"default",
			"default",
			"scanTable");
	CatalogTable catalogTable = createKafkaSourceCatalogTable();
	final DynamicTableSource actualSource = FactoryUtil.createTableSource(null,
			objectIdentifier,
			catalogTable,
			new Configuration(),
			Thread.currentThread().getContextClassLoader());

	// Test scan source equals
	final KafkaDynamicSourceBase expectedKafkaSource = getExpectedScanSource(
			producedDataType,
			TOPIC,
			KAFKA_PROPERTIES,
			decodingFormat,
			StartupMode.SPECIFIC_OFFSETS,
			specificOffsets,
			0);
	final KafkaDynamicSourceBase actualKafkaSource = (KafkaDynamicSourceBase) actualSource;
	assertEquals(actualKafkaSource, expectedKafkaSource);

	// Test Kafka consumer
	ScanTableSource.ScanRuntimeProvider provider =
			actualKafkaSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
	assertThat(provider, instanceOf(SourceFunctionProvider.class));
	final SourceFunctionProvider sourceFunctionProvider = (SourceFunctionProvider) provider;
	final SourceFunction<RowData> sourceFunction = sourceFunctionProvider.createSourceFunction();
	assertThat(sourceFunction, instanceOf(getExpectedConsumerClass()));
	//  Test commitOnCheckpoints flag should be true when set consumer group
	assertTrue(((FlinkKafkaConsumerBase) sourceFunction).getEnableCommitOnCheckpoints());
}