Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#getConfig()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#getConfig() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SerializationFrameworkMiniBenchmarks.java    From flink-benchmarks with Apache License 2.0 6 votes vote down vote up
@Benchmark
@OperationsPerInvocation(value = SerializationFrameworkMiniBenchmarks.RECORDS_PER_INVOCATION)
public void serializerKryo(FlinkEnvironmentContext context) throws Exception {
	StreamExecutionEnvironment env = context.env;
	env.setParallelism(4);
	ExecutionConfig executionConfig = env.getConfig();
	executionConfig.enableForceKryo();
	executionConfig.registerKryoType(MyPojo.class);
	executionConfig.registerKryoType(MyOperation.class);

	env.addSource(new PojoSource(RECORDS_PER_INVOCATION, 10))
			.rebalance()
			.addSink(new DiscardingSink<>());

	env.execute();
}
 
Example 2
Source File: ExecutorUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Sets batch properties for {@link StreamExecutionEnvironment}.
 */
public static void setBatchProperties(StreamExecutionEnvironment execEnv, TableConfig tableConfig) {
	ExecutionConfig executionConfig = execEnv.getConfig();
	executionConfig.enableObjectReuse();
	executionConfig.setLatencyTrackingInterval(-1);
	execEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	execEnv.setBufferTimeout(-1);
	if (isShuffleModeAllBlocking(tableConfig)) {
		executionConfig.setDefaultInputDependencyConstraint(InputDependencyConstraint.ALL);
	}
}
 
Example 3
Source File: SerializationFrameworkMiniBenchmarks.java    From flink-benchmarks with Apache License 2.0 5 votes vote down vote up
@Benchmark
@OperationsPerInvocation(value = SerializationFrameworkMiniBenchmarks.RECORDS_PER_INVOCATION)
public void serializerHeavyString(FlinkEnvironmentContext context) throws Exception {
	StreamExecutionEnvironment env = context.env;
	env.setParallelism(1);
	ExecutionConfig executionConfig = env.getConfig();
	executionConfig.registerPojoType(MyPojo.class);
	executionConfig.registerPojoType(MyOperation.class);

	env.addSource(new LongStringSource(RECORDS_PER_INVOCATION, 12))
			.rebalance()
			.addSink(new DiscardingSink<>());

	env.execute();
}
 
Example 4
Source File: SerializationFrameworkMiniBenchmarks.java    From flink-benchmarks with Apache License 2.0 5 votes vote down vote up
@Benchmark
@OperationsPerInvocation(value = SerializationFrameworkMiniBenchmarks.RECORDS_PER_INVOCATION)
public void serializerPojo(FlinkEnvironmentContext context) throws Exception {
	StreamExecutionEnvironment env = context.env;
	env.setParallelism(4);
	ExecutionConfig executionConfig = env.getConfig();
	executionConfig.registerPojoType(MyPojo.class);
	executionConfig.registerPojoType(MyOperation.class);

	env.addSource(new PojoSource(RECORDS_PER_INVOCATION, 10))
			.rebalance()
			.addSink(new DiscardingSink<>());

	env.execute();
}
 
Example 5
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test iteration job when disable slot sharing, check slot sharing group and co-location group.
 */
@Test
public void testIterationWithSlotSharingDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());

	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);
	StreamGraph streamGraph = generator.generate();

	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertNotNull(iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}
 
Example 6
Source File: BatchExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Sets batch configs.
 */
private void setBatchProperties(StreamExecutionEnvironment execEnv) {
	ExecutionConfig executionConfig = execEnv.getConfig();
	executionConfig.enableObjectReuse();
	executionConfig.setLatencyTrackingInterval(-1);
	execEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	execEnv.setBufferTimeout(-1);
	if (isShuffleModeAllBatch()) {
		executionConfig.setDefaultInputDependencyConstraint(InputDependencyConstraint.ALL);
	}
}
 
Example 7
Source File: StreamGraph.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public StreamGraph(StreamExecutionEnvironment environment) {
	this.environment = environment;
	this.executionConfig = environment.getConfig();
	this.checkpointConfig = environment.getCheckpointConfig();

	// create an empty new stream graph.
	clear();
}
 
Example 8
Source File: KafkaITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 9
Source File: Kafka011ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 10
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test delete behavior and metrics for producer.
 * @throws Exception
 */
public void runAllDeletesTest() throws Exception {
	final String topic = "alldeletestest";
	createTestTopic(topic, 1, 1);
	final int elementCount = 300;

	// ----------- Write some data into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());

	DataStream<Tuple2<byte[], PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<byte[], PojoValue>>() {
		@Override
		public void run(SourceContext<Tuple2<byte[], PojoValue>> ctx) throws Exception {
			Random rnd = new Random(1337);
			for (long i = 0; i < elementCount; i++) {
				final byte[] key = new byte[200];
				rnd.nextBytes(key);
				ctx.collect(new Tuple2<>(key, (PojoValue) null));
			}
		}

		@Override
		public void cancel() {
		}
	});

	TypeInformationKeyValueSerializationSchema<byte[], PojoValue> schema = new TypeInformationKeyValueSerializationSchema<>(byte[].class, PojoValue.class, env.getConfig());

	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "3");
	producerProperties.putAll(secureProps);
	kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);

	env.execute("Write deletes to Kafka");

	// ----------- Read the data again -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	DataStream<Tuple2<byte[], PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, schema, props));

	fromKafka.flatMap(new RichFlatMapFunction<Tuple2<byte[], PojoValue>, Object>() {
		long counter = 0;
		@Override
		public void flatMap(Tuple2<byte[], PojoValue> value, Collector<Object> out) throws Exception {
			// ensure that deleted messages are passed as nulls
			assertNull(value.f1);
			counter++;
			if (counter == elementCount) {
				// we got the right number of elements
				throw new SuccessException();
			}
		}
	});

	tryExecute(env, "Read deletes from Kafka");

	deleteTestTopic(topic);
}
 
Example 11
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void runKeyValueTest() throws Exception {
	final String topic = "keyvaluetest";
	createTestTopic(topic, 1, 1);
	final int elementCount = 5000;

	// ----------- Write some data into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
		@Override
		public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
			Random rnd = new Random(1337);
			for (long i = 0; i < elementCount; i++) {
				PojoValue pojo = new PojoValue();
				pojo.when = new Date(rnd.nextLong());
				pojo.lon = rnd.nextLong();
				pojo.lat = i;
				// make every second key null to ensure proper "null" serialization
				Long key = (i % 2 == 0) ? null : i;
				ctx.collect(new Tuple2<>(key, pojo));
			}
		}

		@Override
		public void cancel() {
		}
	});

	KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "3");
	kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
	env.execute("Write KV to Kafka");

	// ----------- Read the data again -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	KafkaDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
	fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
		long counter = 0;
		@Override
		public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
			// the elements should be in order.
			Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
			if (value.f1.lat % 2 == 0) {
				assertNull("key was not null", value.f0);
			} else {
				Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
			}
			counter++;
			if (counter == elementCount) {
				// we got the right number of elements
				throw new SuccessException();
			}
		}
	});

	tryExecute(env, "Read KV from Kafka");

	deleteTestTopic(topic);
}
 
Example 12
Source File: KafkaITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 13
Source File: Kafka011ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 14
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test slot sharing group is enabled or disabled for iteration.
 */
@Test
public void testDisableSlotSharingForIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());
	// when slot sharing group is disabled
	// all job vertices except iteration vertex would have no slot sharing group
	// iteration vertices would be set slot sharing group automatically
	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(generator.generate());

	SlotSharingGroup iterationSourceSlotSharingGroup = null;
	SlotSharingGroup iterationSinkSlotSharingGroup = null;

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceSlotSharingGroup = jobVertex.getSlotSharingGroup();
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkSlotSharingGroup = jobVertex.getSlotSharingGroup();
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
		} else {
			assertNull(jobVertex.getSlotSharingGroup());
		}
	}

	assertNotNull(iterationSourceSlotSharingGroup);
	assertNotNull(iterationSinkSlotSharingGroup);
	assertEquals(iterationSourceSlotSharingGroup, iterationSinkSlotSharingGroup);

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}
 
Example 15
Source File: KafkaITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 16
Source File: Kafka010ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 17
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void runKeyValueTest() throws Exception {
	final String topic = "keyvaluetest";
	createTestTopic(topic, 1, 1);
	final int elementCount = 5000;

	// ----------- Write some data into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());

	DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
		@Override
		public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
			Random rnd = new Random(1337);
			for (long i = 0; i < elementCount; i++) {
				PojoValue pojo = new PojoValue();
				pojo.when = new Date(rnd.nextLong());
				pojo.lon = rnd.nextLong();
				pojo.lat = i;
				// make every second key null to ensure proper "null" serialization
				Long key = (i % 2 == 0) ? null : i;
				ctx.collect(new Tuple2<>(key, pojo));
			}
		}

		@Override
		public void cancel() {
		}
	});

	KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "3");
	kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
	env.execute("Write KV to Kafka");

	// ----------- Read the data again -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());

	KafkaDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
	fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
		long counter = 0;
		@Override
		public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
			// the elements should be in order.
			Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
			if (value.f1.lat % 2 == 0) {
				assertNull("key was not null", value.f0);
			} else {
				Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
			}
			counter++;
			if (counter == elementCount) {
				// we got the right number of elements
				throw new SuccessException();
			}
		}
	});

	tryExecute(env, "Read KV from Kafka");

	deleteTestTopic(topic);
}
 
Example 18
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public void runKeyValueTest() throws Exception {
	final String topic = "keyvaluetest";
	createTestTopic(topic, 1, 1);
	final int elementCount = 5000;

	// ----------- Write some data into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Long, PojoValue>> kvStream = env.addSource(new SourceFunction<Tuple2<Long, PojoValue>>() {
		@Override
		public void run(SourceContext<Tuple2<Long, PojoValue>> ctx) throws Exception {
			Random rnd = new Random(1337);
			for (long i = 0; i < elementCount; i++) {
				PojoValue pojo = new PojoValue();
				pojo.when = new Date(rnd.nextLong());
				pojo.lon = rnd.nextLong();
				pojo.lat = i;
				// make every second key null to ensure proper "null" serialization
				Long key = (i % 2 == 0) ? null : i;
				ctx.collect(new Tuple2<>(key, pojo));
			}
		}

		@Override
		public void cancel() {
		}
	});

	KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "3");
	kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null);
	env.execute("Write KV to Kafka");

	// ----------- Read the data again -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	KafkaDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
	fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
		long counter = 0;
		@Override
		public void flatMap(Tuple2<Long, PojoValue> value, Collector<Object> out) throws Exception {
			// the elements should be in order.
			Assert.assertTrue("Wrong value " + value.f1.lat, value.f1.lat == counter);
			if (value.f1.lat % 2 == 0) {
				assertNull("key was not null", value.f0);
			} else {
				Assert.assertTrue("Wrong value " + value.f0, value.f0 == counter);
			}
			counter++;
			if (counter == elementCount) {
				// we got the right number of elements
				throw new SuccessException();
			}
		}
	});

	tryExecute(env, "Read KV from Kafka");

	deleteTestTopic(topic);
}
 
Example 19
Source File: Kafka010ITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}
 
Example 20
Source File: Kafka011ITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}