org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkKafkaSchemaTest1.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}
 
Example #2
Source File: FlinkKafkaConsumerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testOpen() throws Exception {
	MockDeserializationSchema<Object> deserializationSchema = new MockDeserializationSchema<>();

	AbstractStreamOperatorTestHarness<Object> testHarness = createTestHarness(
		new DummyFlinkKafkaConsumer<>(new KafkaDeserializationSchemaWrapper<>(deserializationSchema)),
		1,
		0
	);

	testHarness.open();
	assertThat("Open method was not called", deserializationSchema.isOpenCalled(), is(true));
}
 
Example #3
Source File: FlinkKafkaSchemaTest1.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}
 
Example #4
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final SerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #5
Source File: Kafka010FetcherTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka010Fetcher<String> fetcher = new Kafka010Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* periodic watermark extractor */
			null, /* punctuated watermark extractor */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
 
Example #6
Source File: KafkaTestEnvironment.java    From flink with Apache License 2.0 4 votes vote down vote up
public <T> FlinkKafkaConsumerBase<T> getConsumer(List<String> topics, DeserializationSchema<T> deserializationSchema, Properties props) {
	return getConsumer(topics, new KafkaDeserializationSchemaWrapper<T>(deserializationSchema), props);
}
 
Example #7
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #8
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #9
Source File: Kafka09FetcherTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* periodic watermark extractor */
			null, /* punctuated watermark extractor */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
 
Example #10
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #11
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #12
Source File: KafkaTestEnvironment.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public <T> FlinkKafkaConsumerBase<T> getConsumer(List<String> topics, DeserializationSchema<T> deserializationSchema, Properties props) {
	return getConsumer(topics, new KafkaDeserializationSchemaWrapper<T>(deserializationSchema), props);
}
 
Example #13
Source File: Kafka010FetcherTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka010Fetcher<String> fetcher = new Kafka010Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* periodic watermark extractor */
			null, /* punctuated watermark extractor */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
 
Example #14
Source File: Kafka010FetcherTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka010Fetcher<String> fetcher = new Kafka010Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* watermark strategy */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
 
Example #15
Source File: KafkaTestEnvironment.java    From flink with Apache License 2.0 4 votes vote down vote up
public <T> FlinkKafkaConsumerBase<T> getConsumer(List<String> topics, DeserializationSchema<T> deserializationSchema, Properties props) {
	return getConsumer(topics, new KafkaDeserializationSchemaWrapper<T>(deserializationSchema), props);
}
 
Example #16
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final SerializationSchema<Tuple2<Integer, Integer>> serSchema =
				new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #17
Source File: Kafka09FetcherTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testCancellationWhenEmitBlocks() throws Exception {

	// ----- some test data -----

	final String topic = "test-topic";
	final int partition = 3;
	final byte[] payload = new byte[] {1, 2, 3, 4};

	final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
			new ConsumerRecord<>(topic, partition, 15, payload, payload),
			new ConsumerRecord<>(topic, partition, 16, payload, payload),
			new ConsumerRecord<>(topic, partition, 17, payload, payload));

	final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
	data.put(new TopicPartition(topic, partition), records);

	final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);

	// ----- the test consumer -----

	final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
	when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {
		@Override
		public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
			return consumerRecords;
		}
	});

	whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);

	// ----- build a fetcher -----

	BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
	Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets =
		Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
	KafkaDeserializationSchema<String> schema = new KafkaDeserializationSchemaWrapper<>(new SimpleStringSchema());

	final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(
			sourceContext,
			partitionsWithInitialOffsets,
			null, /* periodic watermark extractor */
			null, /* punctuated watermark extractor */
			new TestProcessingTimeService(),
			10, /* watermark interval */
			this.getClass().getClassLoader(),
			"task_name",
			schema,
			new Properties(),
			0L,
			new UnregisteredMetricsGroup(),
			new UnregisteredMetricsGroup(),
			false, null);

	// ----- run the fetcher -----

	final AtomicReference<Throwable> error = new AtomicReference<>();
	final Thread fetcherRunner = new Thread("fetcher runner") {

		@Override
		public void run() {
			try {
				fetcher.runFetchLoop();
			} catch (Throwable t) {
				error.set(t);
			}
		}
	};
	fetcherRunner.start();

	// wait until the thread started to emit records to the source context
	sourceContext.waitTillHasBlocker();

	// now we try to cancel the fetcher, including the interruption usually done on the task thread
	// once it has finished, there must be no more thread blocked on the source context
	fetcher.cancel();
	fetcherRunner.interrupt();
	fetcherRunner.join();

	assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
 
Example #18
Source File: FlinkKafkaConsumer011.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.11.x
 *
 * <p>This constructor allows passing multiple topics to the consumer.
 *
 * @param topics
 *           The Kafka topics to read from.
 * @param deserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties that are used to configure both the fetcher and the offset handler.
 */
public FlinkKafkaConsumer011(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
	this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props);
}
 
Example #19
Source File: FlinkKafkaConsumer011.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.11.x. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer011#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern
 *           The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties used to configure the Kafka consumer client, and the ZooKeeper client.
 */
@PublicEvolving
public FlinkKafkaConsumer011(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #20
Source File: FlinkKafkaConsumer010.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.10.x
 *
 * <p>This constructor allows passing multiple topics to the consumer.
 *
 * @param topics
 *           The Kafka topics to read from.
 * @param deserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties that are used to configure both the fetcher and the offset handler.
 */
public FlinkKafkaConsumer010(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
	this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props);
}
 
Example #21
Source File: FlinkKafkaConsumer.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer   The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 */
public FlinkKafkaConsumer(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(null, subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #22
Source File: FlinkKafkaConsumer.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer.
 *
 * <p>This constructor allows passing multiple topics to the consumer.
 *
 * @param topics       The Kafka topics to read from.
 * @param deserializer The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 */
public FlinkKafkaConsumer(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
	this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props);
}
 
Example #23
Source File: FlinkKafkaConsumer010.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.10.x. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer010#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern
 *           The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties used to configure the Kafka consumer client, and the ZooKeeper client.
 */
@PublicEvolving
public FlinkKafkaConsumer010(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #24
Source File: FlinkKafkaConsumer.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer.
 *
 * <p>This constructor allows passing multiple topics to the consumer.
 *
 * @param topics       The Kafka topics to read from.
 * @param deserializer The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 */
public FlinkKafkaConsumer(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
	this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props);
}
 
Example #25
Source File: FlinkKafkaConsumer010.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.10.x. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer010#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern
 *           The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties used to configure the Kafka consumer client, and the ZooKeeper client.
 */
@PublicEvolving
public FlinkKafkaConsumer010(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #26
Source File: FlinkKafkaConsumer010.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.10.x. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer010#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern
 *           The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties used to configure the Kafka consumer client, and the ZooKeeper client.
 */
@PublicEvolving
public FlinkKafkaConsumer010(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #27
Source File: FlinkKafkaConsumer09.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.9.x. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer09#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern
 *           The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties used to configure the Kafka consumer client, and the ZooKeeper client.
 */
@PublicEvolving
public FlinkKafkaConsumer09(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #28
Source File: FlinkKafkaConsumer08.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.8.x
 *
 * <p>This constructor allows passing multiple topics to the consumer.
 *
 * @param topics
 *           The Kafka topics to read from.
 * @param deserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties that are used to configure both the fetcher and the offset handler.
 */
public FlinkKafkaConsumer08(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
	this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props);
}
 
Example #29
Source File: FlinkKafkaConsumer08.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer for Kafka 0.8.x. Use this constructor to
 * subscribe to multiple topics based on a regular expression pattern.
 *
 * <p>If partition discovery is enabled (by setting a non-negative value for
 * {@link FlinkKafkaConsumer08#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics
 * with names matching the pattern will also be subscribed to as they are created on the fly.
 *
 * @param subscriptionPattern
 *           The regular expression for a pattern of topic names to subscribe to.
 * @param valueDeserializer
 *           The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 *           The properties used to configure the Kafka consumer client, and the ZooKeeper client.
 */
@PublicEvolving
public FlinkKafkaConsumer08(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
	this(subscriptionPattern, new KafkaDeserializationSchemaWrapper<>(valueDeserializer), props);
}
 
Example #30
Source File: FlinkKafkaConsumer.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new Kafka streaming source consumer.
 *
 * <p>This constructor allows passing multiple topics to the consumer.
 *
 * @param topics       The Kafka topics to read from.
 * @param deserializer The de-/serializer used to convert between Kafka's byte messages and Flink's objects.
 * @param props
 */
public FlinkKafkaConsumer(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
	this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props);
}