org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Kafka08Fetcher.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private SimpleConsumerThread<T> createAndStartSimpleConsumerThread(
		List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions,
		Node leader,
		ExceptionProxy errorHandler) throws IOException, ClassNotFoundException {
	// each thread needs its own copy of the deserializer, because the deserializer is
	// not necessarily thread safe
	final KafkaDeserializationSchema<T> clonedDeserializer =
			InstantiationUtil.clone(deserializer, runtimeContext.getUserCodeClassLoader());

	// seed thread with list of fetch partitions (otherwise it would shut down immediately again
	SimpleConsumerThread<T> brokerThread = new SimpleConsumerThread<>(
			this, errorHandler, kafkaConfig, leader, seedPartitions, unassignedPartitionsQueue,
			clonedDeserializer, invalidOffsetBehavior);

	brokerThread.setName(String.format("SimpleConsumer - %s - broker-%s (%s:%d)",
			runtimeContext.getTaskName(), leader.id(), leader.host(), leader.port()));
	brokerThread.setDaemon(true);
	brokerThread.start();

	LOG.info("Starting thread {}", brokerThread.getName());
	return brokerThread;
}
 
Example #2
Source File: Kafka08Fetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
private SimpleConsumerThread<T> createAndStartSimpleConsumerThread(
		List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions,
		Node leader,
		ExceptionProxy errorHandler) throws IOException, ClassNotFoundException {
	// each thread needs its own copy of the deserializer, because the deserializer is
	// not necessarily thread safe
	final KafkaDeserializationSchema<T> clonedDeserializer =
			InstantiationUtil.clone(deserializer, runtimeContext.getUserCodeClassLoader());

	// seed thread with list of fetch partitions (otherwise it would shut down immediately again
	SimpleConsumerThread<T> brokerThread = new SimpleConsumerThread<>(
			this, errorHandler, kafkaConfig, leader, seedPartitions, unassignedPartitionsQueue,
			clonedDeserializer, invalidOffsetBehavior);

	brokerThread.setName(String.format("SimpleConsumer - %s - broker-%s (%s:%d)",
			runtimeContext.getTaskName(), leader.id(), leader.host(), leader.port()));
	brokerThread.setDaemon(true);
	brokerThread.start();

	LOG.info("Starting thread {}", brokerThread.getName());
	return brokerThread;
}
 
Example #3
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 5 votes vote down vote up
/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode();

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}
 
Example #4
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 5 votes vote down vote up
/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //largest   smallest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}
 
Example #5
Source File: KafkaSource.java    From sylph with Apache License 2.0 5 votes vote down vote up
@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer010<>(
            topicSets,
            deserializationSchema,
            properties);
}
 
Example #6
Source File: KafkaSource08.java    From sylph with Apache License 2.0 5 votes vote down vote up
@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"auto.commit.enable"-> true
    //"auto.commit.interval.ms" -> 90000
    FlinkKafkaConsumer08<Row> kafkaConsumer08 = new FlinkKafkaConsumer08<>(topicSets, deserializationSchema, properties);
    //kafkaConsumer08.setCommitOffsetsOnCheckpoints(true);
    return kafkaConsumer08;
}
 
Example #7
Source File: KafkaSource09.java    From sylph with Apache License 2.0 5 votes vote down vote up
@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer09<>(topicSets, deserializationSchema, properties);
}
 
Example #8
Source File: KafkaToHDFSAvroJob.java    From flink-tutorials with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        ParameterTool params = Utils.parseArgs(args);

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        KafkaDeserializationSchema<Message> schema = ClouderaRegistryKafkaDeserializationSchema
                .builder(Message.class)
                .setConfig(Utils.readSchemaRegistryProperties(params))
                .build();

        FlinkKafkaConsumer<Message> consumer = new FlinkKafkaConsumer<Message>(params.getRequired(K_KAFKA_TOPIC), schema, Utils.readKafkaProperties(params));

        DataStream<String> source = env.addSource(consumer)
                .name("Kafka Source")
                .uid("Kafka Source")
                .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription())
                .name("ToOutputString");

        StreamingFileSink<String> sink = StreamingFileSink
                .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder<String>("UTF-8"))
                .build();

        source.addSink(sink)
                .name("FS Sink")
                .uid("FS Sink");

        source.print();

        env.execute("Flink Streaming Secured Job Sample");
    }
 
Example #9
Source File: ProtobufKafkaSourceProvider.java    From stateful-functions with Apache License 2.0 5 votes vote down vote up
private <T> KafkaDeserializationSchema<T> deserializationSchema(JsonNode json) {
  String descriptorSetPath = Selectors.textAt(json, DESCRIPTOR_SET_POINTER);
  String messageType = Selectors.textAt(json, MESSAGE_TYPE_POINTER);
  // this cast is safe since we validate that the produced message type (T) is assignable to a
  // Message.
  // see asJsonIngressSpec()
  @SuppressWarnings("unchecked")
  KafkaIngressDeserializer<T> deserializer =
      (KafkaIngressDeserializer<T>)
          new ProtobufKafkaIngressDeserializer(descriptorSetPath, messageType);
  return new KafkaDeserializationSchemaDelegate<>(deserializer);
}
 
Example #10
Source File: ProtobufKafkaSourceProvider.java    From stateful-functions with Apache License 2.0 5 votes vote down vote up
@Override
public <T> SourceFunction<T> forSpec(IngressSpec<T> spec) {
  JsonNode json = asJsonIngressSpec(spec);
  Properties properties = kafkaClientProperties(json);
  List<String> topics = Selectors.textListAt(json, TOPICS_POINTER);
  KafkaDeserializationSchema<T> deserializationSchema = deserializationSchema(json);
  return new FlinkKafkaConsumer<>(topics, deserializationSchema, properties);
}
 
Example #11
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 5 votes vote down vote up
/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}
 
Example #12
Source File: Kafka010Fetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
public Kafka010Fetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		ProcessingTimeService processingTimeProvider,
		long autoWatermarkInterval,
		ClassLoader userCodeClassLoader,
		String taskNameWithSubtasks,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long pollTimeout,
		MetricGroup subtaskMetricGroup,
		MetricGroup consumerMetricGroup,
		boolean useMetrics,
		FlinkConnectorRateLimiter rateLimiter) throws Exception {
	super(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			processingTimeProvider,
			autoWatermarkInterval,
			userCodeClassLoader,
			taskNameWithSubtasks,
			deserializer,
			kafkaProperties,
			pollTimeout,
			subtaskMetricGroup,
			consumerMetricGroup,
			useMetrics, rateLimiter);
}
 
Example #13
Source File: KafkaFetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
public KafkaFetcher(
	SourceFunction.SourceContext<T> sourceContext,
	Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
	SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
	SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
	ProcessingTimeService processingTimeProvider,
	long autoWatermarkInterval,
	ClassLoader userCodeClassLoader,
	String taskNameWithSubtasks,
	KafkaDeserializationSchema<T> deserializer,
	Properties kafkaProperties,
	long pollTimeout,
	MetricGroup subtaskMetricGroup,
	MetricGroup consumerMetricGroup,
	boolean useMetrics) throws Exception {
	super(
		sourceContext,
		assignedPartitionsWithInitialOffsets,
		watermarksPeriodic,
		watermarksPunctuated,
		processingTimeProvider,
		autoWatermarkInterval,
		userCodeClassLoader,
		consumerMetricGroup,
		useMetrics);

	this.deserializer = deserializer;
	this.handover = new Handover();

	this.consumerThread = new KafkaConsumerThread(
		LOG,
		handover,
		kafkaProperties,
		unassignedPartitionsQueue,
		getFetcherName() + " for " + taskNameWithSubtasks,
		pollTimeout,
		useMetrics,
		consumerMetricGroup,
		subtaskMetricGroup);
}
 
Example #14
Source File: Kafka08Fetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
public Kafka08Fetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> seedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		StreamingRuntimeContext runtimeContext,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long autoCommitInterval,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {
	super(
			sourceContext,
			seedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			runtimeContext.getProcessingTimeService(),
			runtimeContext.getExecutionConfig().getAutoWatermarkInterval(),
			runtimeContext.getUserCodeClassLoader(),
			consumerMetricGroup,
			useMetrics);

	this.deserializer = checkNotNull(deserializer);
	this.kafkaConfig = checkNotNull(kafkaProperties);
	this.runtimeContext = runtimeContext;
	this.invalidOffsetBehavior = getInvalidOffsetBehavior(kafkaProperties);
	this.autoCommitInterval = autoCommitInterval;
}
 
Example #15
Source File: SimpleConsumerThread.java    From flink with Apache License 2.0 5 votes vote down vote up
public SimpleConsumerThread(
		Kafka08Fetcher<T> owner,
		ExceptionProxy errorHandler,
		Properties config,
		Node broker,
		List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions,
		ClosableBlockingQueue<KafkaTopicPartitionState<TopicAndPartition>> unassignedPartitions,
		KafkaDeserializationSchema<T> deserializer,
		long invalidOffsetBehavior) {
	this.owner = owner;
	this.errorHandler = errorHandler;
	this.broker = broker;
	// all partitions should have been assigned a starting offset by the fetcher
	checkAllPartitionsHaveDefinedStartingOffsets(seedPartitions);
	this.partitions = seedPartitions;
	this.deserializer = requireNonNull(deserializer);
	this.unassignedPartitions = requireNonNull(unassignedPartitions);
	this.newPartitionsQueue = new ClosableBlockingQueue<>();
	this.invalidOffsetBehavior = invalidOffsetBehavior;

	// these are the actual configuration values of Kafka + their original default values.
	this.soTimeout = getInt(config, "socket.timeout.ms", 30000);
	this.minBytes = getInt(config, "fetch.min.bytes", 1);
	this.maxWait = getInt(config, "fetch.wait.max.ms", 100);
	this.fetchSize = getInt(config, "fetch.message.max.bytes", 1048576);
	this.bufferSize = getInt(config, "socket.receive.buffer.bytes", 65536);
	this.reconnectLimit = getInt(config, "flink.simple-consumer-reconnectLimit", 3);
	String groupId = config.getProperty("group.id", "flink-kafka-consumer-legacy-" + broker.id());
	this.clientId = config.getProperty("client.id", groupId);
}
 
Example #16
Source File: KafkaShuffleFetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
public KafkaShuffleFetcher(
		SourceFunction.SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<WatermarkStrategy<T>> watermarkStrategy,
		ProcessingTimeService processingTimeProvider,
		long autoWatermarkInterval,
		ClassLoader userCodeClassLoader,
		String taskNameWithSubtasks,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long pollTimeout,
		MetricGroup subtaskMetricGroup,
		MetricGroup consumerMetricGroup,
		boolean useMetrics,
		TypeSerializer<T> typeSerializer,
		int producerParallelism) throws Exception {
	super(
		sourceContext,
		assignedPartitionsWithInitialOffsets,
		watermarkStrategy,
		processingTimeProvider,
		autoWatermarkInterval,
		userCodeClassLoader,
		taskNameWithSubtasks,
		deserializer,
		kafkaProperties,
		pollTimeout,
		subtaskMetricGroup,
		consumerMetricGroup,
		useMetrics);

	this.kafkaShuffleDeserializer = new KafkaShuffleElementDeserializer<>(typeSerializer);
	this.watermarkHandler = new WatermarkHandler(producerParallelism);
}
 
Example #17
Source File: KafkaFetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
public KafkaFetcher(
	SourceFunction.SourceContext<T> sourceContext,
	Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
	SerializedValue<WatermarkStrategy<T>> watermarkStrategy,
	ProcessingTimeService processingTimeProvider,
	long autoWatermarkInterval,
	ClassLoader userCodeClassLoader,
	String taskNameWithSubtasks,
	KafkaDeserializationSchema<T> deserializer,
	Properties kafkaProperties,
	long pollTimeout,
	MetricGroup subtaskMetricGroup,
	MetricGroup consumerMetricGroup,
	boolean useMetrics) throws Exception {
	super(
		sourceContext,
		assignedPartitionsWithInitialOffsets,
		watermarkStrategy,
		processingTimeProvider,
		autoWatermarkInterval,
		userCodeClassLoader,
		consumerMetricGroup,
		useMetrics);

	this.deserializer = deserializer;
	this.handover = new Handover();

	this.consumerThread = new KafkaConsumerThread(
		LOG,
		handover,
		kafkaProperties,
		unassignedPartitionsQueue,
		getFetcherName() + " for " + taskNameWithSubtasks,
		pollTimeout,
		useMetrics,
		consumerMetricGroup,
		subtaskMetricGroup);
	this.kafkaCollector = new KafkaCollector();
}
 
Example #18
Source File: Kafka010Fetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
public Kafka010Fetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<WatermarkStrategy<T>> watermarkStrategy,
		ProcessingTimeService processingTimeProvider,
		long autoWatermarkInterval,
		ClassLoader userCodeClassLoader,
		String taskNameWithSubtasks,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long pollTimeout,
		MetricGroup subtaskMetricGroup,
		MetricGroup consumerMetricGroup,
		boolean useMetrics,
		FlinkConnectorRateLimiter rateLimiter) throws Exception {
	super(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarkStrategy,
			processingTimeProvider,
			autoWatermarkInterval,
			userCodeClassLoader,
			consumerMetricGroup,
			useMetrics);

	this.deserializer = deserializer;
	this.handover = new Handover();

	this.consumerThread = new KafkaConsumerThread(
			LOG,
			handover,
			kafkaProperties,
			unassignedPartitionsQueue,
			"Kafka 0.10 Fetcher for " + taskNameWithSubtasks,
			pollTimeout,
			useMetrics,
			consumerMetricGroup,
			subtaskMetricGroup,
			rateLimiter);
}
 
Example #19
Source File: Kafka010Fetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public Kafka010Fetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		ProcessingTimeService processingTimeProvider,
		long autoWatermarkInterval,
		ClassLoader userCodeClassLoader,
		String taskNameWithSubtasks,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long pollTimeout,
		MetricGroup subtaskMetricGroup,
		MetricGroup consumerMetricGroup,
		boolean useMetrics,
		FlinkConnectorRateLimiter rateLimiter) throws Exception {
	super(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			processingTimeProvider,
			autoWatermarkInterval,
			userCodeClassLoader,
			taskNameWithSubtasks,
			deserializer,
			kafkaProperties,
			pollTimeout,
			subtaskMetricGroup,
			consumerMetricGroup,
			useMetrics, rateLimiter);
}
 
Example #20
Source File: KafkaFetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public KafkaFetcher(
	SourceFunction.SourceContext<T> sourceContext,
	Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
	SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
	SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
	ProcessingTimeService processingTimeProvider,
	long autoWatermarkInterval,
	ClassLoader userCodeClassLoader,
	String taskNameWithSubtasks,
	KafkaDeserializationSchema<T> deserializer,
	Properties kafkaProperties,
	long pollTimeout,
	MetricGroup subtaskMetricGroup,
	MetricGroup consumerMetricGroup,
	boolean useMetrics) throws Exception {
	super(
		sourceContext,
		assignedPartitionsWithInitialOffsets,
		watermarksPeriodic,
		watermarksPunctuated,
		processingTimeProvider,
		autoWatermarkInterval,
		userCodeClassLoader,
		consumerMetricGroup,
		useMetrics);

	this.deserializer = deserializer;
	this.handover = new Handover();

	this.consumerThread = new KafkaConsumerThread(
		LOG,
		handover,
		kafkaProperties,
		unassignedPartitionsQueue,
		getFetcherName() + " for " + taskNameWithSubtasks,
		pollTimeout,
		useMetrics,
		consumerMetricGroup,
		subtaskMetricGroup);
}
 
Example #21
Source File: Kafka08Fetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public Kafka08Fetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> seedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		StreamingRuntimeContext runtimeContext,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long autoCommitInterval,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {
	super(
			sourceContext,
			seedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			runtimeContext.getProcessingTimeService(),
			runtimeContext.getExecutionConfig().getAutoWatermarkInterval(),
			runtimeContext.getUserCodeClassLoader(),
			consumerMetricGroup,
			useMetrics);

	this.deserializer = checkNotNull(deserializer);
	this.kafkaConfig = checkNotNull(kafkaProperties);
	this.runtimeContext = runtimeContext;
	this.invalidOffsetBehavior = getInvalidOffsetBehavior(kafkaProperties);
	this.autoCommitInterval = autoCommitInterval;
}
 
Example #22
Source File: SimpleConsumerThread.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public SimpleConsumerThread(
		Kafka08Fetcher<T> owner,
		ExceptionProxy errorHandler,
		Properties config,
		Node broker,
		List<KafkaTopicPartitionState<TopicAndPartition>> seedPartitions,
		ClosableBlockingQueue<KafkaTopicPartitionState<TopicAndPartition>> unassignedPartitions,
		KafkaDeserializationSchema<T> deserializer,
		long invalidOffsetBehavior) {
	this.owner = owner;
	this.errorHandler = errorHandler;
	this.broker = broker;
	// all partitions should have been assigned a starting offset by the fetcher
	checkAllPartitionsHaveDefinedStartingOffsets(seedPartitions);
	this.partitions = seedPartitions;
	this.deserializer = requireNonNull(deserializer);
	this.unassignedPartitions = requireNonNull(unassignedPartitions);
	this.newPartitionsQueue = new ClosableBlockingQueue<>();
	this.invalidOffsetBehavior = invalidOffsetBehavior;

	// these are the actual configuration values of Kafka + their original default values.
	this.soTimeout = getInt(config, "socket.timeout.ms", 30000);
	this.minBytes = getInt(config, "fetch.min.bytes", 1);
	this.maxWait = getInt(config, "fetch.wait.max.ms", 100);
	this.fetchSize = getInt(config, "fetch.message.max.bytes", 1048576);
	this.bufferSize = getInt(config, "socket.receive.buffer.bytes", 65536);
	this.reconnectLimit = getInt(config, "flink.simple-consumer-reconnectLimit", 3);
	String groupId = config.getProperty("group.id", "flink-kafka-consumer-legacy-" + broker.id());
	this.clientId = config.getProperty("client.id", groupId);
}
 
Example #23
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 4 votes vote down vote up
public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);
 
Example #24
Source File: Kafka09Fetcher.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public Kafka09Fetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		ProcessingTimeService processingTimeProvider,
		long autoWatermarkInterval,
		ClassLoader userCodeClassLoader,
		String taskNameWithSubtasks,
		KafkaDeserializationSchema<T> deserializer,
		Properties kafkaProperties,
		long pollTimeout,
		MetricGroup subtaskMetricGroup,
		MetricGroup consumerMetricGroup,
		boolean useMetrics,
		FlinkConnectorRateLimiter rateLimiter) throws Exception {
	super(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			processingTimeProvider,
			autoWatermarkInterval,
			userCodeClassLoader,
			consumerMetricGroup,
			useMetrics);

	this.deserializer = deserializer;
	this.handover = new Handover();

	this.consumerThread = new KafkaConsumerThread(
			LOG,
			handover,
			kafkaProperties,
			unassignedPartitionsQueue,
			createCallBridge(),
			getFetcherName() + " for " + taskNameWithSubtasks,
			pollTimeout,
			useMetrics,
			consumerMetricGroup,
			subtaskMetricGroup,
			rateLimiter);
}
 
Example #25
Source File: Kafka010FetcherTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka010Fetcher<String> fetcher = new Kafka010Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* watermark strategy */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
 
Example #26
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 4 votes vote down vote up
public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);
 
Example #27
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 4 votes vote down vote up
public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);
 
Example #28
Source File: KafkaSourceProvider.java    From flink-statefun with Apache License 2.0 4 votes vote down vote up
private <T> KafkaDeserializationSchema<T> deserializationSchemaFromSpec(
    KafkaIngressSpec<T> spec) {
  return new KafkaDeserializationSchemaDelegate<>(spec.deserializer());
}
 
Example #29
Source File: KafkaSourceProvider.java    From stateful-functions with Apache License 2.0 4 votes vote down vote up
private <T> KafkaDeserializationSchema<T> deserializationSchemaFromSpec(
    KafkaIngressSpec<T> spec) {
  KafkaIngressDeserializer<T> ingressDeserializer =
      ReflectionUtil.instantiate(spec.deserializerClass());
  return new KafkaDeserializationSchemaDelegate<>(ingressDeserializer);
}
 
Example #30
Source File: Kafka010FetcherTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka010Fetcher<String> fetcher = new Kafka010Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* periodic watermark extractor */
			null, /* punctuated watermark extractor */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}