org.apache.flink.util.PropertiesUtil Java Examples

The following examples show how to use org.apache.flink.util.PropertiesUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkKafkaShuffleProducer.java    From flink with Apache License 2.0 6 votes vote down vote up
FlinkKafkaShuffleProducer(
		String defaultTopicId,
		TypeSerializer<IN> typeSerializer,
		Properties props,
		KeySelector<IN, KEY> keySelector,
		Semantic semantic,
		int kafkaProducersPoolSize) {
	super(defaultTopicId, (element, timestamp) -> null, props, semantic, kafkaProducersPoolSize);

	this.kafkaSerializer = new KafkaSerializer<>(typeSerializer);
	this.keySelector = keySelector;

	Preconditions.checkArgument(
		props.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	numberOfPartitions = PropertiesUtil.getInt(props, PARTITION_NUMBER, Integer.MIN_VALUE);
}
 
Example #2
Source File: FlinkKafkaConsumer08.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected AbstractFetcher<T, ?> createFetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		StreamingRuntimeContext runtimeContext,
		OffsetCommitMode offsetCommitMode,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {

	long autoCommitInterval = (offsetCommitMode == OffsetCommitMode.KAFKA_PERIODIC)
			? PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000)
			: -1; // this disables the periodic offset committer thread in the fetcher

	return new Kafka08Fetcher<>(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			runtimeContext,
			deserializer,
			kafkaProperties,
			autoCommitInterval,
			consumerMetricGroup,
			useMetrics);
}
 
Example #3
Source File: KafkaShuffleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer(
		String topic,
		StreamExecutionEnvironment env,
		int numberOfPartitions,
		int producerParallelism,
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	createTestTopic(topic, numberOfPartitions, 1);

	env.setParallelism(producerParallelism);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(timeCharacteristic);

	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source;

	Properties properties = kafkaServer.getStandardProperties();
	Properties kafkaProperties = PropertiesUtil.flatten(properties);

	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));
	kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0);

	env.execute("Write to " + topic);
	ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder();

	for (int p = 0; p < numberOfPartitions; p++) {
		results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000));
	}

	deleteTestTopic(topic);

	return results.build();
}
 
Example #4
Source File: FlinkKafkaConsumer08.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected AbstractFetcher<T, ?> createFetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		StreamingRuntimeContext runtimeContext,
		OffsetCommitMode offsetCommitMode,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {

	long autoCommitInterval = (offsetCommitMode == OffsetCommitMode.KAFKA_PERIODIC)
			? PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000)
			: -1; // this disables the periodic offset committer thread in the fetcher

	return new Kafka08Fetcher<>(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			runtimeContext,
			deserializer,
			kafkaProperties,
			autoCommitInterval,
			consumerMetricGroup,
			useMetrics);
}
 
Example #5
Source File: FlinkKafkaShuffleConsumer.java    From flink with Apache License 2.0 5 votes vote down vote up
FlinkKafkaShuffleConsumer(
		String topic,
		TypeInformationSerializationSchema<T> schema,
		TypeSerializer<T> typeSerializer,
		Properties props) {
	// The schema is needed to call the right FlinkKafkaConsumer constructor.
	// It is never used, can be `null`, but `null` confuses the compiler.
	super(topic, schema, props);
	this.typeSerializer = typeSerializer;

	Preconditions.checkArgument(
		props.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	producerParallelism = PropertiesUtil.getInt(props, PRODUCER_PARALLELISM, Integer.MAX_VALUE);
}
 
Example #6
Source File: FlinkKafkaConsumer010.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}
 
Example #7
Source File: FlinkKafkaConsumer09.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
			PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}
 
Example #8
Source File: FlinkKafkaConsumer.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}
 
Example #9
Source File: FlinkKafkaShuffle.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * The read side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>Each consumer task should read kafka partitions equal to the key group indices it is assigned.
 * The number of kafka partitions is the maximum parallelism of the consumer.
 * This version only supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after receiving at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#writeKeyBy
 *
 * @param topic 			The topic of Kafka where data is persisted
 * @param env 				Execution environment. readKeyBy's environment can be different from writeKeyBy's
 * @param typeInformation 	Type information of the data persisted in Kafka
 * @param kafkaProperties 	kafka properties for Kafka Consumer
 * @param keySelector 		key selector to retrieve key
 * @param <T> 				Schema type
 * @param <K> 				Key type
 * @return Keyed data stream
 */
public static <T, K> KeyedStream<T, K> readKeyBy(
		String topic,
		StreamExecutionEnvironment env,
		TypeInformation<T> typeInformation,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	TypeSerializer<T> typeSerializer = typeInformation.createSerializer(env.getConfig());
	TypeInformationSerializationSchema<T> schema =
		new TypeInformationSerializationSchema<>(typeInformation, typeSerializer);

	SourceFunction<T> kafkaConsumer  =
		new FlinkKafkaShuffleConsumer<>(topic, schema, typeSerializer, kafkaProperties);

	// TODO: consider situations where numberOfPartitions != consumerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	int numberOfPartitions = PropertiesUtil.getInt(kafkaProperties, PARTITION_NUMBER, Integer.MIN_VALUE);
	DataStream<T> outputDataStream = env.addSource(kafkaConsumer).setParallelism(numberOfPartitions);

	return DataStreamUtils.reinterpretAsKeyedStream(outputDataStream, keySelector);
}
 
Example #10
Source File: FlinkKafkaShuffle.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Uses Kafka as a message bus to persist keyBy shuffle.
 *
 * <p>Persisting keyBy shuffle is achieved by wrapping a {@link FlinkKafkaShuffleProducer} and
 * {@link FlinkKafkaShuffleConsumer} together.
 *
 * <p>On the producer side, {@link FlinkKafkaShuffleProducer}
 * is similar to {@link DataStream#keyBy(KeySelector)}. They use the same key group assignment function
 * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes.
 * Hence, each producer task can potentially write to each Kafka partition based on where the key goes.
 * Here, `numberOfPartitions` equals to the key group size.
 * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts its watermark
 * to ALL of the Kafka partitions to make sure watermark information is propagated correctly.
 *
 * <p>On the consumer side, each consumer task should read partitions equal to the key group indices
 * it is assigned. `numberOfPartitions` is the maximum parallelism of the consumer. This version only
 * supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after reading at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * @see FlinkKafkaShuffle#writeKeyBy
 * @see FlinkKafkaShuffle#readKeyBy
 *
 * @param dataStream 			Data stream to be shuffled
 * @param topic 				Kafka topic written to
 * @param producerParallelism 	Parallelism of producer
 * @param numberOfPartitions 	Number of partitions
 * @param properties 			Kafka properties
 * @param keySelector 			Key selector to retrieve key from `dataStream'
 * @param <T> 					Type of the input data stream
 * @param <K> 					Type of key
 */
public static <T, K> KeyedStream<T, K> persistentKeyBy(
		DataStream<T> dataStream,
		String topic,
		int producerParallelism,
		int numberOfPartitions,
		Properties properties,
		KeySelector<T, K> keySelector) {
	// KafkaProducer#propsToMap uses Properties purely as a HashMap without considering the default properties
	// So we have to flatten the default property to first level elements.
	Properties kafkaProperties = PropertiesUtil.flatten(properties);
	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));

	StreamExecutionEnvironment env = dataStream.getExecutionEnvironment();

	writeKeyBy(dataStream, topic, kafkaProperties, keySelector);
	return readKeyBy(topic, env, dataStream.getType(), kafkaProperties, keySelector);
}
 
Example #11
Source File: LogUtil.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public static long getFetchIntervalMillis(Properties properties) {
    return PropertiesUtil.getLong(properties,
            ConfigConstants.LOG_FETCH_DATA_INTERVAL_MILLIS,
            Consts.DEFAULT_FETCH_INTERVAL_MILLIS);
}
 
Example #12
Source File: LogUtil.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public static int getNumberPerFetch(Properties properties) {
    return PropertiesUtil.getInt(properties,
            ConfigConstants.LOG_MAX_NUMBER_PER_FETCH,
            Consts.DEFAULT_NUMBER_PER_FETCH);
}
 
Example #13
Source File: LogUtil.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public static long getCommitIntervalMs(Properties props) {
    return PropertiesUtil.getLong(props,
            ConfigConstants.LOG_COMMIT_INTERVAL_MILLIS,
            Consts.DEFAULT_COMMIT_INTERVAL_MILLIS);
}
 
Example #14
Source File: LogUtil.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public static long getDiscoveryIntervalMs(Properties props) {
    return PropertiesUtil.getLong(props,
            ConfigConstants.LOG_SHARDS_DISCOVERY_INTERVAL_MILLIS,
            Consts.DEFAULT_SHARDS_DISCOVERY_INTERVAL_MILLIS);
}
 
Example #15
Source File: ConfigWrapper.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public long getLong(String key, long defaultValue) {
    return PropertiesUtil.getLong(props, key, defaultValue);
}
 
Example #16
Source File: ConfigWrapper.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public int getInt(String key, int defaultValue) {
    return PropertiesUtil.getInt(props, key, defaultValue);
}
 
Example #17
Source File: FlinkKafkaConsumer.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}
 
Example #18
Source File: FlinkKafkaConsumer08.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return PropertiesUtil.getBoolean(kafkaProperties, "auto.commit.enable", true) &&
			PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000) > 0;
}
 
Example #19
Source File: FlinkKafkaConsumer09.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
			PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}
 
Example #20
Source File: FlinkKafkaConsumer.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}
 
Example #21
Source File: FlinkKafkaConsumer08.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean getIsAutoCommitEnabled() {
	return PropertiesUtil.getBoolean(kafkaProperties, "auto.commit.enable", true) &&
			PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000) > 0;
}
 
Example #22
Source File: FlinkKafkaShuffle.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in Kafka.
 * {@link FlinkKafkaShuffleProducer} uses the same key group assignment function
 * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes.
 * Hence, each producer task can potentially write to each Kafka partition based on the key.
 * Here, the number of partitions equals to the key group size.
 * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts each watermark
 * to all of the Kafka partitions to make sure watermark information is propagated properly.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#readKeyBy
 *
 * @param dataStream 			Data stream to be shuffled
 * @param topic 				Kafka topic written to
 * @param kafkaProperties 		Kafka properties for Kafka Producer
 * @param keySelector 			Key selector to retrieve key from `dataStream'
 * @param <T> 					Type of the input data stream
 * @param <K> 					Type of key
 */
public static <T, K> void writeKeyBy(
		DataStream<T> dataStream,
		String topic,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	StreamExecutionEnvironment env = dataStream.getExecutionEnvironment();
	TypeSerializer<T> typeSerializer = dataStream.getType().createSerializer(env.getConfig());

	// write data to Kafka
	FlinkKafkaShuffleProducer<T, K> kafkaProducer = new FlinkKafkaShuffleProducer<>(
		topic,
		typeSerializer,
		kafkaProperties,
		env.clean(keySelector),
		FlinkKafkaProducer.Semantic.EXACTLY_ONCE,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE);

	// make sure the sink parallelism is set to producerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	int producerParallelism = PropertiesUtil.getInt(kafkaProperties, PRODUCER_PARALLELISM, Integer.MIN_VALUE);

	addKafkaShuffle(dataStream, kafkaProducer, producerParallelism);
}