org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011 Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Kafka011Example.java    From Flink-CEPplus with Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}
 
Example #2
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<MetricEvent> metricData = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, MetricEvent.class));

    metricData.print();

    CheckPointUtil.setCheckpointConfig(env, parameterTool)
            .execute("zhisheng --- checkpoint config example");
}
 
Example #3
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:9092");
    props.put("zookeeper.connect", "localhost:2181");
    props.put("group.id", "metric-group");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  //key 反序列化
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest"); //value 反序列化

    DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
            "metric",  //kafka topic
            new SimpleStringSchema(),  // String 序列化
            props)).setParallelism(1);

    dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台

    env.execute("Flink add data source");
}
 
Example #4
Source File: Kafka011Example.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}
 
Example #5
Source File: KafkaDeserializationSchemaTest.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);

        FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
                new KafkaMetricSchema(true),
                props);

        env.addSource(kafkaConsumer)
                .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() {
                    @Override
                    public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception {
                        try {
//                            System.out.println(jsonNodes);
                            MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class);
                            collector.collect(metricEvent);
                        } catch (Exception e) {
                            log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e);
                        }
                    }
                })
                .print();
        env.execute();
    }
 
Example #6
Source File: FlinkKafkaConsumerTest1.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);
        //kafka topic list
        List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic"));
        FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props);
        //kafka topic Pattern
        //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props);


//        consumer.setStartFromLatest();
//        consumer.setStartFromEarliest()
        DataStreamSource<MetricEvent> data = env.addSource(consumer);

        data.print();

        env.execute("flink kafka connector test");
    }
 
Example #7
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}
 
Example #8
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(parameterTool.getInt(STREAM_PARALLELISM, 1))
            .map(string -> GsonUtil.fromJson(string, Student.class)).setParallelism(4); //解析字符串成 student 对象

    //timeWindowAll 并行度只能为 1
    student.timeWindowAll(Time.minutes(1)).apply(new AllWindowFunction<Student, List<Student>, TimeWindow>() {
        @Override
        public void apply(TimeWindow window, Iterable<Student> values, Collector<List<Student>> out) throws Exception {
            ArrayList<Student> students = Lists.newArrayList(values);
            if (students.size() > 0) {
                log.info("1 分钟内收集到 student 的数据条数是:" + students.size());
                out.collect(students);
            }
        }
    }).addSink(new SinkToMySQL()).setParallelism(parameterTool.getInt(STREAM_SINK_PARALLELISM, 1));

    env.execute("flink learning connectors mysql");
}
 
Example #9
Source File: FlinkKafkaConsumerTest1.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);
        //kafka topic list
        List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic"));
        FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props);
        //kafka topic Pattern
        //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props);


//        consumer.setStartFromLatest();
//        consumer.setStartFromEarliest()
        DataStreamSource<MetricEvent> data = env.addSource(consumer);

        data.print();

        env.execute("flink kafka connector test");
    }
 
Example #10
Source File: KafkaDeserializationSchemaTest.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);

        FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
                new KafkaMetricSchema(true),
                props);

        env.addSource(kafkaConsumer)
                .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() {
                    @Override
                    public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception {
                        try {
//                            System.out.println(jsonNodes);
                            MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class);
                            collector.collect(metricEvent);
                        } catch (Exception e) {
                            log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e);
                        }
                    }
                })
                .print();
        env.execute();
    }
 
Example #11
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(parameterTool.getInt(STREAM_PARALLELISM, 1))
            .map(string -> GsonUtil.fromJson(string, Student.class)).setParallelism(4); //解析字符串成 student 对象

    //timeWindowAll 并行度只能为 1
    student.timeWindowAll(Time.minutes(1)).apply(new AllWindowFunction<Student, List<Student>, TimeWindow>() {
        @Override
        public void apply(TimeWindow window, Iterable<Student> values, Collector<List<Student>> out) throws Exception {
            ArrayList<Student> students = Lists.newArrayList(values);
            if (students.size() > 0) {
                log.info("1 分钟内收集到 student 的数据条数是:" + students.size());
                out.collect(students);
            }
        }
    }).addSink(new SinkToMySQL()).setParallelism(parameterTool.getInt(STREAM_SINK_PARALLELISM, 1));

    env.execute("flink learning connectors mysql");
}
 
Example #12
Source File: FlinkKafkaSchemaTest1.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}
 
Example #13
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}
 
Example #14
Source File: KafkaSourceMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool);
    DataStream<String> dataStream = blinkStreamEnv.addSource(new FlinkKafkaConsumer011<>(parameterTool.get("kafka.topic"), new SimpleStringSchema(), properties));
    Table table = blinkStreamTableEnv.fromDataStream(dataStream, "word");
    blinkStreamTableEnv.registerTable("kafkaDataStream", table);

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Kafka Table Source");
}
 
Example #15
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:9092");
    props.put("zookeeper.connect", "localhost:2181");
    props.put("group.id", "metric-group");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  //key 反序列化
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest"); //value 反序列化

    DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
            "metric",  //kafka topic
            new SimpleStringSchema(),  // String 序列化
            props)).setParallelism(1);

    dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台

    env.execute("Flink add data source");
}
 
Example #16
Source File: Kafka011Example.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}
 
Example #17
Source File: FlinkDataPipeline.java    From tutorials with MIT License 6 votes vote down vote up
public static void capitalize() throws Exception {
    String inputTopic = "flink_input";
    String outputTopic = "flink_output";
    String consumerGroup = "baeldung";
    String address = "localhost:9092";

    StreamExecutionEnvironment environment =
      StreamExecutionEnvironment.getExecutionEnvironment();

    FlinkKafkaConsumer011<String> flinkKafkaConsumer =
      createStringConsumerForTopic(inputTopic, address, consumerGroup);
    flinkKafkaConsumer.setStartFromEarliest();

    DataStream<String> stringInputStream =
      environment.addSource(flinkKafkaConsumer);

    FlinkKafkaProducer011<String> flinkKafkaProducer =
    createStringProducer(outputTopic, address);

    stringInputStream
      .map(new WordsCapitalizer())
      .addSink(flinkKafkaProducer);

    environment.execute();
}
 
Example #18
Source File: KafkaSourceMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool);
    DataStream<String> dataStream = blinkStreamEnv.addSource(new FlinkKafkaConsumer011<>(parameterTool.get("kafka.topic"), new SimpleStringSchema(), properties));
    Table table = blinkStreamTableEnv.fromDataStream(dataStream, "word");
    blinkStreamTableEnv.registerTable("kafkaDataStream", table);

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Kafka Table Source");
}
 
Example #19
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<MetricEvent> metricData = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, MetricEvent.class));

    metricData.print();

    CheckPointUtil.setCheckpointConfig(env, parameterTool)
            .execute("zhisheng --- checkpoint config example");
}
 
Example #20
Source File: FlinkKafkaSchemaTest1.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}
 
Example #21
Source File: KafkaConfigUtil.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
/**
 * @param env
 * @param topic
 * @param time  订阅的时间
 * @return
 * @throws IllegalAccessException
 */
public static DataStreamSource<MetricEvent> buildSource(StreamExecutionEnvironment env, String topic, Long time) throws IllegalAccessException {
    ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters();
    Properties props = buildKafkaProps(parameterTool);
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(
            topic,
            new MetricSchema(),
            props);
    //重置offset到time时刻
    if (time != 0L) {
        Map<KafkaTopicPartition, Long> partitionOffset = buildOffsetByTime(props, parameterTool, time);
        consumer.setStartFromSpecificOffsets(partitionOffset);
    }
    return env.addSource(consumer);
}
 
Example #22
Source File: JSONKeyValueDeserializationSchemaTest.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);

    FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
            new JSONKeyValueDeserializationSchema(true), //可以控制是否需要元数据字段
            props);

    env.addSource(kafkaConsumer)
            .print();
    //读取到的数据在 value 字段中,对应的元数据在 metadata 字段中
    env.execute();
}
 
Example #23
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
        Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

        SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>(
                parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
                new SimpleStringSchema(),
                props))
                .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON
                .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() {
                    @Override
                    public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception {
                        //收集商品 id 和 price 两个属性
                        out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString()));
                    }
                });
//        product.print();

        //单个 Redis
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build();
        product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper()));

        //Redis 的 ip 信息一般都从配置文件取出来
        //Redis 集群
/*        FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder()
                .setNodes(new HashSet<InetSocketAddress>(
                        Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/

        //Redis Sentinels
/*        FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder()
                .setMasterName("master")
                .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2")))
                .setPassword("")
                .setDatabase(1).build();*/

        env.execute("flink redis connector");
    }
 
Example #24
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson,这里用的是gson解析,解析字符串成 student 对象

    student.addSink(new SinkToMySQL()); //数据 sink 到 mysql

    env.execute("Flink data sink");
}
 
Example #25
Source File: AsyncIOAlert.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);

    Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool);
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(
            parameterTool.get("metrics.topic"),
            new MetricSchema(),
            properties);
    SingleOutputStreamOperator<MetricEvent> machineData = env.addSource(consumer)
            .assignTimestampsAndWatermarks(new MetricWatermark());

    AsyncDataStream.unorderedWait(machineData, new AlertRuleAsyncIOFunction(), 10000, TimeUnit.MICROSECONDS, 100)
            .map(metricEvent -> {
                List<String> ma = (List<String>) metricEvent.getFields().get("xx");
                AlertEvent alertEvent = new AlertEvent();
                alertEvent.setType(metricEvent.getName());
                alertEvent.setTrigerTime(metricEvent.getTimestamp());
                alertEvent.setMetricEvent(metricEvent);
                if (metricEvent.getTags().get("recover") != null && Boolean.valueOf(metricEvent.getTags().get("recover"))) {
                    alertEvent.setRecover(true);
                    alertEvent.setRecoverTime(metricEvent.getTimestamp());
                } else {
                    alertEvent.setRecover(false);
                }
                return alertEvent;
            })
            .print();

    env.execute("Async IO get MySQL data");
}
 
Example #26
Source File: FlinkKafkaConsumerTest2.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    env.setParallelism(1);
    Properties props = buildKafkaProps(parameterTool);

    FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<>("user_behavior_sink", new SimpleStringSchema(), props);

    env.addSource(consumer).print();

    env.execute("flink kafka connector test");
}
 
Example #27
Source File: TuningKeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example #28
Source File: KeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #29
Source File: UnionListStateExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic, String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }
 
Example #30
Source File: Kafka011SourceBuilder.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer011<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer011<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer011<Row>(topic, new MessageDeserialization(), properties);
    }
    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}