Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#fromArgs()

The following examples show how to use org.apache.flink.api.java.utils.ParameterTool#fromArgs() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProduceIntoKinesis.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());

	Properties kinesisProducerConfig = new Properties();
	kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
	kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));

	FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(
			new SimpleStringSchema(), kinesisProducerConfig);

	kinesis.setFailOnError(true);
	kinesis.setDefaultStream("flink-test");
	kinesis.setDefaultPartition("0");

	simpleStringStream.addSink(kinesis);

	see.execute();
}
 
Example 2
Source File: Kafka010Example.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer010<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer010<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.10 Example");
}
 
Example 3
Source File: LongRidesExercise.java    From flink-training-exercises with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)));

		DataStream<TaxiRide> longRides = rides
				.keyBy(ride -> ride.rideId)
				.process(new MatchFunction());

		printOrTest(longRides);

		env.execute("Long Taxi Rides");
	}
 
Example 4
Source File: Kafka010Example.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer010<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer010<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.10 Example");
}
 
Example 5
Source File: ConsumeFromKinesis.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}
 
Example 6
Source File: AbstractTaskManagerProcessFailureRecoveryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
	try {
		final ParameterTool parameterTool = ParameterTool.fromArgs(args);
		Configuration cfg = parameterTool.getConfiguration();

		TaskManagerRunner.runTaskManager(cfg, ResourceID.generate());
	}
	catch (Throwable t) {
		LOG.error("Failed to start TaskManager process", t);
		System.exit(1);
	}
}
 
Example 7
Source File: PeriodicStreamingJob.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	int recordsPerSecond = params.getInt("recordsPerSecond", 10);
	int duration = params.getInt("durationInSecond", 60);
	int offset = params.getInt("offsetInSecond", 0);

	StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	sEnv.enableCheckpointing(4000);
	sEnv.getConfig().setAutoWatermarkInterval(1000);

	// execute a simple pass through program.
	PeriodicSourceGenerator generator = new PeriodicSourceGenerator(
		recordsPerSecond, duration, offset);
	DataStream<Tuple> rows = sEnv.addSource(generator);

	DataStream<Tuple> result = rows
		.keyBy(1)
		.timeWindow(Time.seconds(5))
		.sum(0);

	result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	sEnv.execute();
}
 
Example 8
Source File: KafkaTopicValidator.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}
 
Example 9
Source File: WindowJoin.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse the parameters
	final ParameterTool params = ParameterTool.fromArgs(args);
	final long windowSize = params.getLong("windowSize", 2000);
	final long rate = params.getLong("rate", 3L);

	System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate);
	System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]");

	// obtain execution environment, run this example in "ingestion time"
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	// make parameters available in the web interface
	env.getConfig().setGlobalJobParameters(params);

	// create the data sources for both grades and salaries
	DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate);
	DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate);

	// run the actual window join program
	// for testability, this functionality is in a separate method.
	DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize);

	// print the results with a single thread, rather than in parallel
	joinedStream.print().setParallelism(1);

	// execute program
	env.execute("Windowed Join Example");
}
 
Example 10
Source File: KafkaThroughput.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example 11
Source File: IterateExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000L);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}
 
Example 12
Source File: KinesisExampleTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	LOG.info("System properties: {}", System.getProperties());
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	KinesisPubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties());
	pubsub.createTopic(inputStream, 2, parameterTool.getProperties());
	pubsub.createTopic(outputStream, 2, parameterTool.getProperties());

	// The example job needs to start after streams are created and run in parallel to the validation logic.
	// The thread that runs the job won't terminate, we don't have a job reference to cancel it.
	// Once results are validated, the driver main thread will exit; job/cluster will be terminated from script.
	final AtomicReference<Exception> executeException = new AtomicReference<>();
	Thread executeThread =
		new Thread(
			() -> {
				try {
					KinesisExample.main(args);
					// this message won't appear in the log,
					// job is terminated when shutting down cluster
					LOG.info("executed program");
				} catch (Exception e) {
					executeException.set(e);
				}
			});
	executeThread.start();

	// generate input
	String[] messages = {
		"elephant,5,45218",
		"squirrel,12,46213",
		"bee,3,51348",
		"squirrel,22,52444",
		"bee,10,53412",
		"elephant,9,54867"
	};
	for (String msg : messages) {
		pubsub.sendMessage(inputStream, msg);
	}
	LOG.info("generated records");

	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(60));
	List<String> results = pubsub.readAllMessages(outputStream);
	while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) {
		LOG.info("waiting for results..");
		Thread.sleep(1000);
		results = pubsub.readAllMessages(outputStream);
	}

	if (executeException.get() != null) {
		throw executeException.get();
	}

	LOG.info("results: {}", results);
	Assert.assertEquals("Results received from '" + outputStream + "': " + results,
		messages.length, results.size());

	String[] expectedResults = {
		"elephant,5,45218",
		"elephant,14,54867",
		"squirrel,12,46213",
		"squirrel,34,52444",
		"bee,3,51348",
		"bee,13,53412"
	};

	for (String expectedResult : expectedResults) {
		Assert.assertTrue(expectedResult, results.contains(expectedResult));
	}

	// TODO: main thread needs to create job or CLI fails with:
	// "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment."
	System.out.println("test finished");
	System.exit(0);
}
 
Example 13
Source File: NearestTaxiSolution.java    From flink-training-exercises with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       	// events are out of order by at most 60 seconds
		final int servingSpeedFactor = 600; 	// 10 minutes worth of events are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));

		// add a socket source
		BroadcastStream<Query> queryStream = env.socketTextStream("localhost", 9999)
				.map(new MapFunction<String, Query>() {
					@Override
					public Query map(String msg) throws Exception {
						String[] parts = msg.split(",\\s*");
						return new Query(
								Float.valueOf(parts[0]),	// longitude
								Float.valueOf(parts[1]));	// latitude
					}
				})
				.broadcast(queryDescriptor);

		DataStream<Tuple3<Long, Long, Float>> reports = rides
				.keyBy((TaxiRide ride) -> ride.taxiId)
				.connect(queryStream)
				.process(new QueryFunction());

		DataStream<Tuple3<Long, Long, Float>> nearest = reports
				// key by the queryId
				.keyBy(x -> x.f0)
				// the minimum, for each query, by distance
				.minBy(2);

		nearest.print();

		env.execute("Nearest Available Taxi");
	}
 
Example 14
Source File: KinesisExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}
 
Example 15
Source File: KinesisExample.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}
 
Example 16
Source File: WebLogAnalysis.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}
 
Example 17
Source File: StreamSQLTestProgram.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");
		String planner = params.get("planner", "old");

		final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance();
		builder.inStreamingMode();

		if (planner.equals("old")) {
			builder.useOldPlanner();
		} else if (planner.equals("blink")) {
			builder.useBlinkPlanner();
		}

		final EnvironmentSettings settings = builder.build();

		final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}
 
Example 18
Source File: QsStateClient.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {

		ParameterTool parameters = ParameterTool.fromArgs(args);

		// setup values
		String jobId = parameters.getRequired("job-id");
		String host = parameters.get("host", "localhost");
		int port = parameters.getInt("port", 9069);
		int numIterations = parameters.getInt("iterations", 1500);

		QueryableStateClient client = new QueryableStateClient(host, port);
		client.setExecutionConfig(new ExecutionConfig());

		MapStateDescriptor<EmailId, EmailInformation> stateDescriptor =
				new MapStateDescriptor<>(
						QsConstants.STATE_NAME,
						TypeInformation.of(new TypeHint<EmailId>() {

						}),
						TypeInformation.of(new TypeHint<EmailInformation>() {

						})
				);

		// wait for state to exist
		for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s
			try {
				getMapState(jobId, client, stateDescriptor);
				break;
			} catch (ExecutionException e) {
				if (e.getCause() instanceof UnknownKeyOrNamespaceException) {
					System.err.println("State does not exist yet; sleeping 500ms");
					Thread.sleep(500L);
				} else {
					throw e;
				}
			}

			if (i == (BOOTSTRAP_RETRIES - 1)) {
				throw new RuntimeException("Timeout: state doesn't exist after 120s");
			}
		}

		// query state
		for (int iterations = 0; iterations < numIterations; iterations++) {

			MapState<EmailId, EmailInformation> mapState =
				getMapState(jobId, client, stateDescriptor);

			int counter = 0;
			for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) {
				// this is to force deserialization
				entry.getKey();
				entry.getValue();
				counter++;
			}
			System.out.println("MapState has " + counter + " entries"); // we look for it in the test

			Thread.sleep(100L);
		}
	}
 
Example 19
Source File: StatefulFunctionsJob.java    From stateful-functions with Apache License 2.0 4 votes vote down vote up
public static void main(String... args) throws Exception {
  ParameterTool parameterTool = ParameterTool.fromArgs(args);
  Configuration configuration = parameterTool.getConfiguration();

  main(configuration);
}
 
Example 20
Source File: KeyedJob.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	String savepointsPath = pt.getRequired("savepoint-path");

	Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointsPath);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());

	env.setStateBackend(new MemoryStateBackend());

	/**
	 * Source -> keyBy -> C(Window -> StatefulMap1 -> StatefulMap2)
	 */

	SingleOutputStreamOperator<Tuple2<Integer, Integer>> source = createIntegerTupleSource(env, ExecutionMode.GENERATE);

	SingleOutputStreamOperator<Integer> window = createWindowFunction(ExecutionMode.GENERATE, source);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.GENERATE, window);

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.GENERATE, first);

	env.execute("job");
}