org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KafkaExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example #2
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}
 
Example #3
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}
 
Example #4
Source File: KafkaExample.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example #5
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #6
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}
 
Example #7
Source File: KafkaTableSink.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected SinkFunction<Row> createKafkaProducer(
	String topic,
	Properties properties,
	SerializationSchema<Row> serializationSchema,
	Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}
 
Example #8
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}
 
Example #9
Source File: Kafka011TableSink.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected SinkFunction<Row> createKafkaProducer(
		String topic,
		Properties properties,
		SerializationSchema<Row> serializationSchema,
		Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer011<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}
 
Example #10
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}
 
Example #11
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}
 
Example #12
Source File: KafkaTableSink.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected SinkFunction<Row> createKafkaProducer(
	String topic,
	Properties properties,
	SerializationSchema<Row> serializationSchema,
	Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}
 
Example #13
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #14
Source File: Kafka011TableSink.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected SinkFunction<Row> createKafkaProducer(
		String topic,
		Properties properties,
		SerializationSchema<Row> serializationSchema,
		Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer011<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}
 
Example #15
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #16
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #17
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}
 
Example #18
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}
 
Example #19
Source File: KafkaShortRetentionTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}
 
Example #20
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #21
Source File: KafkaProducerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This test sets KafkaProducer so that it will  automatically flush the data and
 * and fails the broker to check whether flushed records since last checkpoint were not duplicated.
 */
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
	final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
	final int partition = 0;
	final int numElements = 1000;
	final int failAfterElements = 333;

	for (int i = 0; i < sinksCount; i++) {
		createTestTopic(topic + i, 1, 1);
	}

	TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
	KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties properties = new Properties();
	properties.putAll(standardProps);
	properties.putAll(secureProps);

	// process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application
	List<Integer> expectedElements = getIntegersSequence(numElements);

	DataStream<Integer> inputStream = env
		.addSource(new IntegerSource(numElements))
		.map(new FailingIdentityMapper<Integer>(failAfterElements));

	for (int i = 0; i < sinksCount; i++) {
		FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
			@Override
			public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
				return partition;
			}
		};

		if (regularSink) {
			StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner);
			inputStream.addSink(kafkaSink.getUserFunction());
		} else {
			kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner);
		}
	}

	FailingIdentityMapper.failedBefore = false;
	TestUtils.tryExecute(env, "Exactly once test");

	for (int i = 0; i < sinksCount; i++) {
		// assert that before failure we successfully snapshot/flushed all expected elements
		assertExactlyOnceForTopic(
			properties,
			topic + i,
			partition,
			expectedElements,
			KAFKA_READ_TIMEOUT);
		deleteTestTopic(topic + i);
	}
}
 
Example #22
Source File: FlinkKafkaProducer.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, SerializationSchema, Properties, KafkaPartitioner)}
 */
@Deprecated
public FlinkKafkaProducer(String topicId, SerializationSchema<IN> serializationSchema, Properties producerConfig, KafkaPartitioner customPartitioner) {
	super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), producerConfig, customPartitioner);

}
 
Example #23
Source File: FlinkKafkaProducer.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, SerializationSchema, Properties)}
 */
@Deprecated
public FlinkKafkaProducer(String topicId, SerializationSchema<IN> serializationSchema, Properties producerConfig) {
	super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), producerConfig, (FlinkKafkaPartitioner<IN>) null);
}
 
Example #24
Source File: FlinkKafkaProducer.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, String, SerializationSchema)}
 */
@Deprecated
public FlinkKafkaProducer(String brokerList, String topicId, SerializationSchema<IN> serializationSchema) {
	super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), getPropertiesFromBrokerList(brokerList), (FlinkKafkaPartitioner<IN>) null);
}
 
Example #25
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #26
Source File: Kafka010ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #27
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Test ensuring that the producer is not dropping buffered records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testAtLeastOnceProducer() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(true);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));
	testHarness.processElement(new StreamRecord<>("msg-2"));
	testHarness.processElement(new StreamRecord<>("msg-3"));

	verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));
	Assert.assertEquals(3, producer.getPendingSize());

	// start a thread to perform checkpointing
	CheckedThread snapshotThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			// this should block until all records are flushed;
			// if the snapshot implementation returns before pending records are flushed,
			testHarness.snapshot(123L, 123L);
		}
	};
	snapshotThread.start();

	// before proceeding, make sure that flushing has started and that the snapshot is still blocked;
	// this would block forever if the snapshot didn't perform a flush
	producer.waitUntilFlushStarted();
	Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());

	// now, complete the callbacks
	producer.getPendingCallbacks().get(0).onCompletion(null, null);
	Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
	Assert.assertEquals(2, producer.getPendingSize());

	producer.getPendingCallbacks().get(1).onCompletion(null, null);
	Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
	Assert.assertEquals(1, producer.getPendingSize());

	producer.getPendingCallbacks().get(2).onCompletion(null, null);
	Assert.assertEquals(0, producer.getPendingSize());

	// this would fail with an exception if flushing wasn't completed before the snapshot method returned
	snapshotThread.sync();

	testHarness.close();
}
 
Example #28
Source File: KafkaITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #29
Source File: Kafka011ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #30
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Test ensuring that if an async exception is caught for one of the flushed requests on checkpoint,
 * it should be rethrown; we set a timeout because the test will not finish if the logic is broken.
 *
 * <p>Note that this test does not test the snapshot method is blocked correctly when there are pending records.
 * The test for that is covered in testAtLeastOnceProducer.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testAsyncErrorRethrownOnCheckpointAfterFlush() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(true);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));
	testHarness.processElement(new StreamRecord<>("msg-2"));
	testHarness.processElement(new StreamRecord<>("msg-3"));

	verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));

	// only let the first callback succeed for now
	producer.getPendingCallbacks().get(0).onCompletion(null, null);

	CheckedThread snapshotThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			// this should block at first, since there are still two pending records that needs to be flushed
			testHarness.snapshot(123L, 123L);
		}
	};
	snapshotThread.start();

	// let the 2nd message fail with an async exception
	producer.getPendingCallbacks().get(1).onCompletion(null, new Exception("artificial async failure for 2nd message"));
	producer.getPendingCallbacks().get(2).onCompletion(null, null);

	try {
		snapshotThread.sync();
	} catch (Exception e) {
		// the snapshot should have failed with the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async failure for 2nd message"));

		// test succeeded
		return;
	}

	Assert.fail();
}