org.apache.flink.api.common.serialization.TypeInformationSerializationSchema Java Examples

The following examples show how to use org.apache.flink.api.common.serialization.TypeInformationSerializationSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkKafkaShuffleConsumer.java    From flink with Apache License 2.0 5 votes vote down vote up
FlinkKafkaShuffleConsumer(
		String topic,
		TypeInformationSerializationSchema<T> schema,
		TypeSerializer<T> typeSerializer,
		Properties props) {
	// The schema is needed to call the right FlinkKafkaConsumer constructor.
	// It is never used, can be `null`, but `null` confuses the compiler.
	super(topic, schema, props);
	this.typeSerializer = typeSerializer;

	Preconditions.checkArgument(
		props.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	producerParallelism = PropertiesUtil.getInt(props, PRODUCER_PARALLELISM, Integer.MAX_VALUE);
}
 
Example #2
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having more Flink sources than Kafka partitions, which means
 * that some Flink sources will read no partitions.
 */
public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception {
	final String topic = "manyToOneTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 8;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	// set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();
	env.setBufferTimeout(0);

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
		.addSource(kafkaSource)
		.map(new PartitionValidatingMapper(numPartitions, 1))
		.map(new FailingIdentityMapper<Integer>(failAfterElements))
		.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "multi-source-one-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example #3
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final SerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #4
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final SerializationSchema<Tuple2<Integer, Integer>> serSchema =
				new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #5
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void runBrokerFailureTest() throws Exception {
	final String topic = "brokerFailureTestTopic";

	final int parallelism = 2;
	final int numElementsPerPartition = 1000;
	final int totalElements = parallelism * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	createTestTopic(topic, parallelism, 2);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic, parallelism, numElementsPerPartition, true);

	// find leader to shut down
	int leaderId = kafkaServer.getLeaderToShutDown(topic);

	LOG.info("Leader to shutdown {}", leaderId);

	// run the topology (the consumers must handle the failures)

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.enableCheckpointing(500);
	env.setRestartStrategy(RestartStrategies.noRestart());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(parallelism, 1))
			.map(new BrokerKillingMapper<Integer>(leaderId, failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	BrokerKillingMapper.killedLeaderBefore = false;
	tryExecute(env, "Broker failure once test");

	// start a new broker:
	kafkaServer.restartBroker(leaderId);
}
 
Example #6
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having more Flink sources than Kafka partitions, which means
 * that some Flink sources will read no partitions.
 */
public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception {
	final String topic = "manyToOneTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 8;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	// set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
			env.setBufferTimeout(0);

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
		.addSource(kafkaSource)
		.map(new PartitionValidatingMapper(numPartitions, 1))
		.map(new FailingIdentityMapper<Integer>(failAfterElements))
		.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "multi-source-one-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example #7
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so
 * one Flink source will read multiple Kafka partitions.
 */
public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception {
	final String topic = "oneToManyTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 2;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(numPartitions, 3))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-source-multi-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example #8
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having a 1:1 correspondence between kafka partitions and
 * Flink sources.
 */
public void runOneToOneExactlyOnceTest() throws Exception {

	final String topic = "oneToOneTopic";
	final int parallelism = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = parallelism * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	createTestTopic(topic, parallelism, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			parallelism,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);

	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(parallelism, 1))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-to-one exactly once test");

	deleteTestTopic(topic);
}
 
Example #9
Source File: KafkaProducerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This test sets KafkaProducer so that it will  automatically flush the data and
 * and fails the broker to check whether flushed records since last checkpoint were not duplicated.
 */
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
	final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
	final int partition = 0;
	final int numElements = 1000;
	final int failAfterElements = 333;

	for (int i = 0; i < sinksCount; i++) {
		createTestTopic(topic + i, 1, 1);
	}

	TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));

	Properties properties = new Properties();
	properties.putAll(standardProps);
	properties.putAll(secureProps);

	// process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application
	List<Integer> expectedElements = getIntegersSequence(numElements);

	DataStream<Integer> inputStream = env
		.addSource(new IntegerSource(numElements))
		.map(new FailingIdentityMapper<Integer>(failAfterElements));

	for (int i = 0; i < sinksCount; i++) {
		FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
			@Override
			public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
				return partition;
			}
		};

		if (regularSink) {
			StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, schema, properties, partitioner);
			inputStream.addSink(kafkaSink.getUserFunction());
		} else {
			kafkaServer.produceIntoKafka(inputStream, topic + i, schema, properties, partitioner);
		}
	}

	FailingIdentityMapper.failedBefore = false;
	TestUtils.tryExecute(env, "Exactly once test");

	for (int i = 0; i < sinksCount; i++) {
		// assert that before failure we successfully snapshot/flushed all expected elements
		assertExactlyOnceForTopic(
			properties,
			topic + i,
			partition,
			expectedElements,
			KAFKA_READ_TIMEOUT);
		deleteTestTopic(topic + i);
	}
}
 
Example #10
Source File: Kafka010ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #11
Source File: Kafka011ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #12
Source File: KafkaITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #13
Source File: FlinkKafkaShuffle.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * The read side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>Each consumer task should read kafka partitions equal to the key group indices it is assigned.
 * The number of kafka partitions is the maximum parallelism of the consumer.
 * This version only supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after receiving at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#writeKeyBy
 *
 * @param topic 			The topic of Kafka where data is persisted
 * @param env 				Execution environment. readKeyBy's environment can be different from writeKeyBy's
 * @param typeInformation 	Type information of the data persisted in Kafka
 * @param kafkaProperties 	kafka properties for Kafka Consumer
 * @param keySelector 		key selector to retrieve key
 * @param <T> 				Schema type
 * @param <K> 				Key type
 * @return Keyed data stream
 */
public static <T, K> KeyedStream<T, K> readKeyBy(
		String topic,
		StreamExecutionEnvironment env,
		TypeInformation<T> typeInformation,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	TypeSerializer<T> typeSerializer = typeInformation.createSerializer(env.getConfig());
	TypeInformationSerializationSchema<T> schema =
		new TypeInformationSerializationSchema<>(typeInformation, typeSerializer);

	SourceFunction<T> kafkaConsumer  =
		new FlinkKafkaShuffleConsumer<>(topic, schema, typeSerializer, kafkaProperties);

	// TODO: consider situations where numberOfPartitions != consumerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	int numberOfPartitions = PropertiesUtil.getInt(kafkaProperties, PARTITION_NUMBER, Integer.MIN_VALUE);
	DataStream<T> outputDataStream = env.addSource(kafkaConsumer).setParallelism(numberOfPartitions);

	return DataStreamUtils.reinterpretAsKeyedStream(outputDataStream, keySelector);
}
 
Example #14
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #15
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #16
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void runBrokerFailureTest() throws Exception {
	final String topic = "brokerFailureTestTopic";

	final int parallelism = 2;
	final int numElementsPerPartition = 1000;
	final int totalElements = parallelism * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	createTestTopic(topic, parallelism, 2);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic, parallelism, numElementsPerPartition, true);

	// find leader to shut down
	int leaderId = kafkaServer.getLeaderToShutDown(topic);

	LOG.info("Leader to shutdown {}", leaderId);

	// run the topology (the consumers must handle the failures)

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.enableCheckpointing(500);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(parallelism, 1))
			.map(new BrokerKillingMapper<Integer>(leaderId, failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	BrokerKillingMapper.killedLeaderBefore = false;
	tryExecute(env, "Broker failure once test");

	// start a new broker:
	kafkaServer.restartBroker(leaderId);
}
 
Example #17
Source File: KafkaITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #18
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so
 * one Flink source will read multiple Kafka partitions.
 */
public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception {
	final String topic = "oneToManyTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 2;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(numPartitions, 3))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-source-multi-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example #19
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having a 1:1 correspondence between kafka partitions and
 * Flink sources.
 */
public void runOneToOneExactlyOnceTest() throws Exception {

	final String topic = "oneToOneTopic";
	final int parallelism = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = parallelism * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	createTestTopic(topic, parallelism, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			parallelism,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);

	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(parallelism, 1))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-to-one exactly once test");

	deleteTestTopic(topic);
}
 
Example #20
Source File: KafkaProducerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This test sets KafkaProducer so that it will  automatically flush the data and
 * and fails the broker to check whether flushed records since last checkpoint were not duplicated.
 */
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
	final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
	final int partition = 0;
	final int numElements = 1000;
	final int failAfterElements = 333;

	for (int i = 0; i < sinksCount; i++) {
		createTestTopic(topic + i, 1, 1);
	}

	TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
	KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties properties = new Properties();
	properties.putAll(standardProps);
	properties.putAll(secureProps);

	// process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application
	List<Integer> expectedElements = getIntegersSequence(numElements);

	DataStream<Integer> inputStream = env
		.addSource(new IntegerSource(numElements))
		.map(new FailingIdentityMapper<Integer>(failAfterElements));

	for (int i = 0; i < sinksCount; i++) {
		FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
			@Override
			public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
				return partition;
			}
		};

		if (regularSink) {
			StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner);
			inputStream.addSink(kafkaSink.getUserFunction());
		} else {
			kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner);
		}
	}

	FailingIdentityMapper.failedBefore = false;
	TestUtils.tryExecute(env, "Exactly once test");

	for (int i = 0; i < sinksCount; i++) {
		// assert that before failure we successfully snapshot/flushed all expected elements
		assertExactlyOnceForTopic(
			properties,
			topic + i,
			partition,
			expectedElements,
			KAFKA_READ_TIMEOUT);
		deleteTestTopic(topic + i);
	}
}
 
Example #21
Source File: Kafka010ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #22
Source File: Kafka011ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #23
Source File: KafkaITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example #24
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}
 
Example #25
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
 
Example #26
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public void runBrokerFailureTest() throws Exception {
	final String topic = "brokerFailureTestTopic";

	final int parallelism = 2;
	final int numElementsPerPartition = 1000;
	final int totalElements = parallelism * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	createTestTopic(topic, parallelism, 2);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic, parallelism, numElementsPerPartition, true);

	// find leader to shut down
	int leaderId = kafkaServer.getLeaderToShutDown(topic);

	LOG.info("Leader to shutdown {}", leaderId);

	// run the topology (the consumers must handle the failures)

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.enableCheckpointing(500);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(parallelism, 1))
			.map(new BrokerKillingMapper<Integer>(leaderId, failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	BrokerKillingMapper.killedLeaderBefore = false;
	tryExecute(env, "Broker failure once test");

	// start a new broker:
	kafkaServer.restartBroker(leaderId);
}
 
Example #27
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having more Flink sources than Kafka partitions, which means
 * that some Flink sources will read no partitions.
 */
public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception {
	final String topic = "manyToOneTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 8;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	// set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();
	env.setBufferTimeout(0);

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
		.addSource(kafkaSource)
		.map(new PartitionValidatingMapper(numPartitions, 1))
		.map(new FailingIdentityMapper<Integer>(failAfterElements))
		.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "multi-source-one-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example #28
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so
 * one Flink source will read multiple Kafka partitions.
 */
public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception {
	final String topic = "oneToManyTopic";
	final int numPartitions = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = numPartitions * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	final int parallelism = 2;

	createTestTopic(topic, numPartitions, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			numPartitions,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(numPartitions, 3))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-source-multi-partitions exactly once test");

	deleteTestTopic(topic);
}
 
Example #29
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the proper consumption when having a 1:1 correspondence between kafka partitions and
 * Flink sources.
 */
public void runOneToOneExactlyOnceTest() throws Exception {

	final String topic = "oneToOneTopic";
	final int parallelism = 5;
	final int numElementsPerPartition = 1000;
	final int totalElements = parallelism * numElementsPerPartition;
	final int failAfterElements = numElementsPerPartition / 3;

	createTestTopic(topic, parallelism, 1);

	DataGenerators.generateRandomizedIntegerSequence(
			StreamExecutionEnvironment.getExecutionEnvironment(),
			kafkaServer,
			topic,
			parallelism,
			numElementsPerPartition,
			true);

	// run the topology that fails and recovers

	DeserializationSchema<Integer> schema =
			new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);

	FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);

	env
			.addSource(kafkaSource)
			.map(new PartitionValidatingMapper(parallelism, 1))
			.map(new FailingIdentityMapper<Integer>(failAfterElements))
			.addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;
	tryExecute(env, "One-to-one exactly once test");

	deleteTestTopic(topic);
}
 
Example #30
Source File: KafkaProducerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This test sets KafkaProducer so that it will  automatically flush the data and
 * and fails the broker to check whether flushed records since last checkpoint were not duplicated.
 */
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
	final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
	final int partition = 0;
	final int numElements = 1000;
	final int failAfterElements = 333;

	for (int i = 0; i < sinksCount; i++) {
		createTestTopic(topic + i, 1, 1);
	}

	TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
	KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties properties = new Properties();
	properties.putAll(standardProps);
	properties.putAll(secureProps);

	// process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application
	List<Integer> expectedElements = getIntegersSequence(numElements);

	DataStream<Integer> inputStream = env
		.addSource(new IntegerSource(numElements))
		.map(new FailingIdentityMapper<Integer>(failAfterElements));

	for (int i = 0; i < sinksCount; i++) {
		FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
			@Override
			public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
				return partition;
			}
		};

		if (regularSink) {
			StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner);
			inputStream.addSink(kafkaSink.getUserFunction());
		} else {
			kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner);
		}
	}

	FailingIdentityMapper.failedBefore = false;
	TestUtils.tryExecute(env, "Exactly once test");

	for (int i = 0; i < sinksCount; i++) {
		// assert that before failure we successfully snapshot/flushed all expected elements
		assertExactlyOnceForTopic(
			properties,
			topic + i,
			partition,
			expectedElements,
			KAFKA_READ_TIMEOUT);
		deleteTestTopic(topic + i);
	}
}