Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#getProperties()

The following examples show how to use org.apache.flink.api.java.utils.ParameterTool#getProperties() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaConfigUtil.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
/**
 * 设置 kafka 配置
 *
 * @param parameterTool
 * @return
 */
public static Properties buildKafkaProps(ParameterTool parameterTool) {
    Properties props = parameterTool.getProperties();
    props.put("bootstrap.servers", parameterTool.get(PropertiesConstants.KAFKA_BROKERS, DEFAULT_KAFKA_BROKERS));
    props.put("zookeeper.connect", parameterTool.get(PropertiesConstants.KAFKA_ZOOKEEPER_CONNECT, DEFAULT_KAFKA_ZOOKEEPER_CONNECT));
    props.put("group.id", parameterTool.get(PropertiesConstants.KAFKA_GROUP_ID, DEFAULT_KAFKA_GROUP_ID));
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest");
    return props;
}
 
Example 2
Source File: KafkaConfigUtil.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
/**
 * 设置 kafka 配置
 *
 * @param parameterTool
 * @return
 */
public static Properties buildKafkaProps(ParameterTool parameterTool) {
    Properties props = parameterTool.getProperties();
    props.put("bootstrap.servers", parameterTool.get(PropertiesConstants.KAFKA_BROKERS, DEFAULT_KAFKA_BROKERS));
    props.put("zookeeper.connect", parameterTool.get(PropertiesConstants.KAFKA_ZOOKEEPER_CONNECT, DEFAULT_KAFKA_ZOOKEEPER_CONNECT));
    props.put("group.id", parameterTool.get(PropertiesConstants.KAFKA_GROUP_ID, DEFAULT_KAFKA_GROUP_ID));
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest");
    return props;
}
 
Example 3
Source File: KinesisExampleTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	LOG.info("System properties: {}", System.getProperties());
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	PubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties());
	pubsub.createTopic(inputStream, 2, parameterTool.getProperties());
	pubsub.createTopic(outputStream, 2, parameterTool.getProperties());

	// The example job needs to start after streams are created and run in parallel to the validation logic.
	// The thread that runs the job won't terminate, we don't have a job reference to cancel it.
	// Once results are validated, the driver main thread will exit; job/cluster will be terminated from script.
	final AtomicReference<Exception> executeException = new AtomicReference<>();
	Thread executeThread =
		new Thread(
			() -> {
				try {
					KinesisExample.main(args);
					// this message won't appear in the log,
					// job is terminated when shutting down cluster
					LOG.info("executed program");
				} catch (Exception e) {
					executeException.set(e);
				}
			});
	executeThread.start();

	// generate input
	String[] messages = {
		"elephant,5,45218",
		"squirrel,12,46213",
		"bee,3,51348",
		"squirrel,22,52444",
		"bee,10,53412",
		"elephant,9,54867"
	};
	for (String msg : messages) {
		pubsub.sendMessage(inputStream, msg);
	}
	LOG.info("generated records");

	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(60));
	List<String> results = pubsub.readAllMessages(outputStream);
	while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) {
		LOG.info("waiting for results..");
		Thread.sleep(1000);
		results = pubsub.readAllMessages(outputStream);
	}

	if (executeException.get() != null) {
		throw executeException.get();
	}

	LOG.info("results: {}", results);
	Assert.assertEquals("Results received from '" + outputStream + "': " + results,
		messages.length, results.size());

	String[] expectedResults = {
		"elephant,5,45218",
		"elephant,14,54867",
		"squirrel,12,46213",
		"squirrel,34,52444",
		"bee,3,51348",
		"bee,13,53412"
	};

	for (String expectedResult : expectedResults) {
		Assert.assertTrue(expectedResult, results.contains(expectedResult));
	}

	// TODO: main thread needs to create job or CLI fails with:
	// "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment."
	System.out.println("test finished");
	System.exit(0);
}
 
Example 4
Source File: KinesisExample.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}
 
Example 5
Source File: KinesisExampleTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	LOG.info("System properties: {}", System.getProperties());
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	PubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties());
	pubsub.createTopic(inputStream, 2, parameterTool.getProperties());
	pubsub.createTopic(outputStream, 2, parameterTool.getProperties());

	// The example job needs to start after streams are created and run in parallel to the validation logic.
	// The thread that runs the job won't terminate, we don't have a job reference to cancel it.
	// Once results are validated, the driver main thread will exit; job/cluster will be terminated from script.
	final AtomicReference<Exception> executeException = new AtomicReference<>();
	Thread executeThread =
		new Thread(
			() -> {
				try {
					KinesisExample.main(args);
					// this message won't appear in the log,
					// job is terminated when shutting down cluster
					LOG.info("executed program");
				} catch (Exception e) {
					executeException.set(e);
				}
			});
	executeThread.start();

	// generate input
	String[] messages = {
		"elephant,5,45218",
		"squirrel,12,46213",
		"bee,3,51348",
		"squirrel,22,52444",
		"bee,10,53412",
		"elephant,9,54867"
	};
	for (String msg : messages) {
		pubsub.sendMessage(inputStream, msg);
	}
	LOG.info("generated records");

	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(60));
	List<String> results = pubsub.readAllMessages(outputStream);
	while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) {
		LOG.info("waiting for results..");
		Thread.sleep(1000);
		results = pubsub.readAllMessages(outputStream);
	}

	if (executeException.get() != null) {
		throw executeException.get();
	}

	LOG.info("results: {}", results);
	Assert.assertEquals("Results received from '" + outputStream + "': " + results,
		messages.length, results.size());

	String[] expectedResults = {
		"elephant,5,45218",
		"elephant,14,54867",
		"squirrel,12,46213",
		"squirrel,34,52444",
		"bee,3,51348",
		"bee,13,53412"
	};

	for (String expectedResult : expectedResults) {
		Assert.assertTrue(expectedResult, results.contains(expectedResult));
	}

	// TODO: main thread needs to create job or CLI fails with:
	// "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment."
	System.out.println("test finished");
	System.exit(0);
}
 
Example 6
Source File: KinesisExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}
 
Example 7
Source File: ReadFromKafka.java    From flinkDemo with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "dec-esc-group-vib-calc");
    properties.put("enable.auto.commit", "true");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "dec-vibration-test");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    FlinkKafkaConsumer010 consumer010 = new FlinkKafkaConsumer010(
                     parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties());

  //  consumer010.setStartFromEarliest();

    DataStream<String> messageStream = env
            .addSource(consumer010);

    // print() will write the contents of the stream to the TaskManager's standard out stream
    // the rebelance call is causing a repartitioning of the data so that all machines
    // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;

        }
    });


    messageStream.print();

    env.execute();
}
 
Example 8
Source File: StreamingETL.java    From flink-streaming-etl with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}
 
Example 9
Source File: KinesisExampleTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	LOG.info("System properties: {}", System.getProperties());
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	KinesisPubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties());
	pubsub.createTopic(inputStream, 2, parameterTool.getProperties());
	pubsub.createTopic(outputStream, 2, parameterTool.getProperties());

	// The example job needs to start after streams are created and run in parallel to the validation logic.
	// The thread that runs the job won't terminate, we don't have a job reference to cancel it.
	// Once results are validated, the driver main thread will exit; job/cluster will be terminated from script.
	final AtomicReference<Exception> executeException = new AtomicReference<>();
	Thread executeThread =
		new Thread(
			() -> {
				try {
					KinesisExample.main(args);
					// this message won't appear in the log,
					// job is terminated when shutting down cluster
					LOG.info("executed program");
				} catch (Exception e) {
					executeException.set(e);
				}
			});
	executeThread.start();

	// generate input
	String[] messages = {
		"elephant,5,45218",
		"squirrel,12,46213",
		"bee,3,51348",
		"squirrel,22,52444",
		"bee,10,53412",
		"elephant,9,54867"
	};
	for (String msg : messages) {
		pubsub.sendMessage(inputStream, msg);
	}
	LOG.info("generated records");

	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(60));
	List<String> results = pubsub.readAllMessages(outputStream);
	while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) {
		LOG.info("waiting for results..");
		Thread.sleep(1000);
		results = pubsub.readAllMessages(outputStream);
	}

	if (executeException.get() != null) {
		throw executeException.get();
	}

	LOG.info("results: {}", results);
	Assert.assertEquals("Results received from '" + outputStream + "': " + results,
		messages.length, results.size());

	String[] expectedResults = {
		"elephant,5,45218",
		"elephant,14,54867",
		"squirrel,12,46213",
		"squirrel,34,52444",
		"bee,3,51348",
		"bee,13,53412"
	};

	for (String expectedResult : expectedResults) {
		Assert.assertTrue(expectedResult, results.contains(expectedResult));
	}

	// TODO: main thread needs to create job or CLI fails with:
	// "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment."
	System.out.println("test finished");
	System.exit(0);
}
 
Example 10
Source File: KinesisExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}