Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#getRequired()

The following examples show how to use org.apache.flink.api.java.utils.ParameterTool#getRequired() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaItemTransactionJob.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
public DataStream<ItemTransaction> readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) {
	// We read the ItemTransaction objects directly using the schema
	FlinkKafkaConsumer<ItemTransaction> transactionSource = new FlinkKafkaConsumer<>(
			params.getRequired(TRANSACTION_INPUT_TOPIC_KEY), new TransactionSchema(),
			Utils.readKafkaProperties(params, true));

	transactionSource.setCommitOffsetsOnCheckpoints(true);
	transactionSource.setStartFromEarliest();

	// In case event time processing is enabled we assign trailing watermarks for each partition
	transactionSource.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ItemTransaction>(Time.minutes(1)) {
		@Override
		public long extractTimestamp(ItemTransaction transaction) {
			return transaction.ts;
		}
	});

	return env.addSource(transactionSource)
			.name("Kafka Transaction Source")
			.uid("Kafka Transaction Source");
}
 
Example 2
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void setupStateBackend(final StreamExecutionEnvironment env, final ParameterTool pt) throws IOException {
	final String stateBackend = pt.get(
		STATE_BACKEND.key(),
		STATE_BACKEND.defaultValue());

	final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key());

	if ("file".equalsIgnoreCase(stateBackend)) {
		boolean asyncCheckpoints = pt.getBoolean(
			STATE_BACKEND_FILE_ASYNC.key(),
			STATE_BACKEND_FILE_ASYNC.defaultValue());

		env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints));
	} else if ("rocks".equalsIgnoreCase(stateBackend)) {
		boolean incrementalCheckpoints = pt.getBoolean(
			STATE_BACKEND_ROCKS_INCREMENTAL.key(),
			STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue());

		env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
	} else {
		throw new IllegalArgumentException("Unknown backend requested: " + stateBackend);
	}
}
 
Example 3
Source File: ConsumeFromDynamoDBStreams.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties dynamodbStreamsConsumerConfig = new Properties();
	final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME);
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>(
			streamName,
			new SimpleStringSchema(),
			dynamodbStreamsConsumerConfig));

	dynamodbStreams.print();

	see.execute();
}
 
Example 4
Source File: KafkaToHDFSSimpleJob.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = Utils.parseArgs(args);
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(params.getRequired("kafkaTopic"), new SimpleStringSchema(), Utils.readKafkaProperties(params));
		DataStream<String> source = env.addSource(consumer).name("Kafka Source").uid("Kafka Source");

		StreamingFileSink<String> sink = StreamingFileSink
				.forRowFormat(new Path(params.getRequired("hdfsOutput")), new SimpleStringEncoder<String>("UTF-8"))
				.build();

		source.addSink(sink).name("FS Sink").uid("FS Sink");
		source.print();

		env.execute("Flink Streaming Secured Job Sample");
	}
 
Example 5
Source File: SummarizationJobParameters.java    From timely with Apache License 2.0 5 votes vote down vote up
public SummarizationJobParameters(ParameterTool params) {
    timelyHostname = params.getRequired("timelyHostname");
    timelyTcpPort = params.getInt("timelyTcpPort", 4241);
    timelyHttpsPort = params.getInt("timelyHttpsPort", 4242);
    timelyWssPort = params.getInt("timelyWssPort", 4243);
    clientAuth = params.getBoolean("clientAuth", false);
    doLogin = params.getBoolean("doLogin", false);
    timelyUsername = params.get("timelyUsername", null);
    timelyPassword = params.get("timelyPassword", null);
    keyStoreFile = params.getRequired("keyStoreFile");
    keyStoreType = params.get("keyStoreType", "JKS");
    keyStorePass = params.getRequired("keyStorePass");
    trustStoreFile = params.getRequired("trustStoreFile");
    trustStoreType = params.get("trustStoreType", "JKS");
    trustStorePass = params.getRequired("trustStorePass");
    hostVerificationEnabled = params.getBoolean("hostVerificationEnabled", true);
    bufferSize = params.getInt("bufferSize", 10485760);
    String metricNames = params.getRequired("metrics");
    if (null != metricNames) {
        metrics = metricNames.split(",");
    } else {
        metrics = null;
    }
    startTime = params.getLong("startTime", 0L);
    endTime = params.getLong("endTime", 0L);
    interval = params.getRequired("interval");
    intervalUnits = params.getRequired("intervalUnits");
}
 
Example 6
Source File: PeriodicStreamingJob.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	int recordsPerSecond = params.getInt("recordsPerSecond", 10);
	int duration = params.getInt("durationInSecond", 60);
	int offset = params.getInt("offsetInSecond", 0);

	StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	sEnv.enableCheckpointing(4000);
	sEnv.getConfig().setAutoWatermarkInterval(1000);

	// execute a simple pass through program.
	PeriodicSourceGenerator generator = new PeriodicSourceGenerator(
		recordsPerSecond, duration, offset);
	DataStream<Tuple> rows = sEnv.addSource(generator);

	DataStream<Tuple> result = rows
		.keyBy(1)
		.timeWindow(Time.seconds(5))
		.sum(0);

	result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	sEnv.execute();
}
 
Example 7
Source File: KafkaToHDFSAvroJob.java    From flink-tutorials with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        ParameterTool params = Utils.parseArgs(args);

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        KafkaDeserializationSchema<Message> schema = ClouderaRegistryKafkaDeserializationSchema
                .builder(Message.class)
                .setConfig(Utils.readSchemaRegistryProperties(params))
                .build();

        FlinkKafkaConsumer<Message> consumer = new FlinkKafkaConsumer<Message>(params.getRequired(K_KAFKA_TOPIC), schema, Utils.readKafkaProperties(params));

        DataStream<String> source = env.addSource(consumer)
                .name("Kafka Source")
                .uid("Kafka Source")
                .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription())
                .name("ToOutputString");

        StreamingFileSink<String> sink = StreamingFileSink
                .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder<String>("UTF-8"))
                .build();

        source.addSink(sink)
                .name("FS Sink")
                .uid("FS Sink");

        source.print();

        env.execute("Flink Streaming Secured Job Sample");
    }
 
Example 8
Source File: PeriodicStreamingJob.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	int recordsPerSecond = params.getInt("recordsPerSecond", 10);
	int duration = params.getInt("durationInSecond", 60);
	int offset = params.getInt("offsetInSecond", 0);

	StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	sEnv.enableCheckpointing(4000);
	sEnv.getConfig().setAutoWatermarkInterval(1000);

	// execute a simple pass through program.
	PeriodicSourceGenerator generator = new PeriodicSourceGenerator(
		recordsPerSecond, duration, offset);
	DataStream<Tuple> rows = sEnv.addSource(generator);

	DataStream<Tuple> result = rows
		.keyBy(1)
		.timeWindow(Time.seconds(5))
		.sum(0);

	result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	sEnv.execute();
}
 
Example 9
Source File: QsStateClient.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {

		ParameterTool parameters = ParameterTool.fromArgs(args);

		// setup values
		String jobId = parameters.getRequired("job-id");
		String host = parameters.get("host", "localhost");
		int port = parameters.getInt("port", 9069);
		int numIterations = parameters.getInt("iterations", 1500);

		QueryableStateClient client = new QueryableStateClient(host, port);
		client.setExecutionConfig(new ExecutionConfig());

		MapStateDescriptor<EmailId, EmailInformation> stateDescriptor =
				new MapStateDescriptor<>(
						QsConstants.STATE_NAME,
						TypeInformation.of(new TypeHint<EmailId>() {

						}),
						TypeInformation.of(new TypeHint<EmailInformation>() {

						})
				);

		// wait for state to exist
		for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s
			try {
				getMapState(jobId, client, stateDescriptor);
				break;
			} catch (ExecutionException e) {
				if (e.getCause() instanceof UnknownKeyOrNamespaceException) {
					System.err.println("State does not exist yet; sleeping 500ms");
					Thread.sleep(500L);
				} else {
					throw e;
				}
			}

			if (i == (BOOTSTRAP_RETRIES - 1)) {
				throw new RuntimeException("Timeout: state doesn't exist after 120s");
			}
		}

		// query state
		for (int iterations = 0; iterations < numIterations; iterations++) {

			MapState<EmailId, EmailInformation> mapState =
				getMapState(jobId, client, stateDescriptor);

			int counter = 0;
			for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) {
				// this is to force deserialization
				entry.getKey();
				entry.getValue();
				counter++;
			}
			System.out.println("MapState has " + counter + " entries"); // we look for it in the test

			Thread.sleep(100L);
		}
	}
 
Example 10
Source File: FlinkPulsarBatchCsvSinkExample.java    From pulsar with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        // parse input arguments
        final ParameterTool parameterTool = ParameterTool.fromArgs(args);

        if (parameterTool.getNumberOfParameters() < 2) {
            System.out.println("Missing parameters!");
            System.out.println("Usage: pulsar --service-url <pulsar-service-url> --topic <topic>");
            return;
        }

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(parameterTool);

        String serviceUrl = parameterTool.getRequired("service-url");
        String topic = parameterTool.getRequired("topic");

        System.out.println("Parameters:");
        System.out.println("\tServiceUrl:\t" + serviceUrl);
        System.out.println("\tTopic:\t" + topic);

        // create PulsarCsvOutputFormat instance
        final OutputFormat<Tuple4<Integer, String, Integer, Integer>> pulsarCsvOutputFormat =
                new PulsarCsvOutputFormat<>(serviceUrl, topic, new AuthenticationDisabled());

        // create DataSet
        DataSet<Tuple4<Integer, String, Integer, Integer>> nasaMissionDS = env.fromCollection(nasaMissions);
        // map nasa mission names to upper-case
        nasaMissionDS.map(
            new MapFunction<Tuple4<Integer, String, Integer, Integer>, Tuple4<Integer, String, Integer, Integer>>() {
                           @Override
                           public Tuple4<Integer, String, Integer, Integer> map(
                                   Tuple4<Integer, String, Integer, Integer> nasaMission) throws Exception {
                               return new Tuple4(
                                       nasaMission.f0,
                                       nasaMission.f1.toUpperCase(),
                                       nasaMission.f2,
                                       nasaMission.f3);
                           }
                       }
        )
        // filter missions which started after 1970
        .filter(nasaMission -> nasaMission.f2 > 1970)
        // write batch data to Pulsar
        .output(pulsarCsvOutputFormat);

        // set parallelism to write Pulsar in parallel (optional)
        env.setParallelism(2);

        // execute program
        env.execute("Flink - Pulsar Batch Csv");

    }
 
Example 11
Source File: KinesisExampleTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	LOG.info("System properties: {}", System.getProperties());
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	KinesisPubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties());
	pubsub.createTopic(inputStream, 2, parameterTool.getProperties());
	pubsub.createTopic(outputStream, 2, parameterTool.getProperties());

	// The example job needs to start after streams are created and run in parallel to the validation logic.
	// The thread that runs the job won't terminate, we don't have a job reference to cancel it.
	// Once results are validated, the driver main thread will exit; job/cluster will be terminated from script.
	final AtomicReference<Exception> executeException = new AtomicReference<>();
	Thread executeThread =
		new Thread(
			() -> {
				try {
					KinesisExample.main(args);
					// this message won't appear in the log,
					// job is terminated when shutting down cluster
					LOG.info("executed program");
				} catch (Exception e) {
					executeException.set(e);
				}
			});
	executeThread.start();

	// generate input
	String[] messages = {
		"elephant,5,45218",
		"squirrel,12,46213",
		"bee,3,51348",
		"squirrel,22,52444",
		"bee,10,53412",
		"elephant,9,54867"
	};
	for (String msg : messages) {
		pubsub.sendMessage(inputStream, msg);
	}
	LOG.info("generated records");

	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(60));
	List<String> results = pubsub.readAllMessages(outputStream);
	while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) {
		LOG.info("waiting for results..");
		Thread.sleep(1000);
		results = pubsub.readAllMessages(outputStream);
	}

	if (executeException.get() != null) {
		throw executeException.get();
	}

	LOG.info("results: {}", results);
	Assert.assertEquals("Results received from '" + outputStream + "': " + results,
		messages.length, results.size());

	String[] expectedResults = {
		"elephant,5,45218",
		"elephant,14,54867",
		"squirrel,12,46213",
		"squirrel,34,52444",
		"bee,3,51348",
		"bee,13,53412"
	};

	for (String expectedResult : expectedResults) {
		Assert.assertTrue(expectedResult, results.contains(expectedResult));
	}

	// TODO: main thread needs to create job or CLI fails with:
	// "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment."
	System.out.println("test finished");
	System.exit(0);
}
 
Example 12
Source File: FlinkPulsarBatchAvroSinkExample.java    From pulsar with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        // parse input arguments
        final ParameterTool parameterTool = ParameterTool.fromArgs(args);

        if (parameterTool.getNumberOfParameters() < 2) {
            System.out.println("Missing parameters!");
            System.out.println("Usage: pulsar --service-url <pulsar-service-url> --topic <topic>");
            return;
        }

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(parameterTool);

        String serviceUrl = parameterTool.getRequired("service-url");
        String topic = parameterTool.getRequired("topic");

        System.out.println("Parameters:");
        System.out.println("\tServiceUrl:\t" + serviceUrl);
        System.out.println("\tTopic:\t" + topic);

        // create PulsarAvroOutputFormat instance
        final OutputFormat<NasaMission> pulsarAvroOutputFormat = new PulsarAvroOutputFormat<>(serviceUrl, topic, new AuthenticationDisabled());

        // create DataSet
        DataSet<NasaMission> nasaMissionDS = env.fromCollection(nasaMissions);
        // map nasa mission names to upper-case
        nasaMissionDS.map(nasaMission -> new NasaMission(
                nasaMission.getId(),
                nasaMission.getName(),
                nasaMission.getStartYear(),
                nasaMission.getEndYear()))
                // filter missions which started after 1970
                .filter(nasaMission -> nasaMission.getStartYear() > 1970)
                // write batch data to Pulsar
                .output(pulsarAvroOutputFormat);

        // set parallelism to write Pulsar in parallel (optional)
        env.setParallelism(2);

        // execute program
        env.execute("Flink - Pulsar Batch Avro");
    }
 
Example 13
Source File: KinesisExample.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}
 
Example 14
Source File: PopularPlacesSql.java    From flink-training-exercises with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// read parameters
		ParameterTool params = ParameterTool.fromArgs(args);
		String input = params.getRequired("input");

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// create a TableEnvironment
		StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

		// register TaxiRideTableSource as table "TaxiRides"
		tEnv.registerTableSource(
				"TaxiRides",
				new TaxiRideTableSource(
						input,
						maxEventDelay,
						servingSpeedFactor));

		// register user-defined functions
		tEnv.registerFunction("isInNYC", new GeoUtils.IsInNYC());
		tEnv.registerFunction("toCellId", new GeoUtils.ToCellId());
		tEnv.registerFunction("toCoords", new GeoUtils.ToCoords());

		Table results = tEnv.sqlQuery(
			"SELECT " +
				"toCoords(cell), wstart, wend, isStart, popCnt " +
			"FROM " +
				"(SELECT " +
					"cell, " +
					"isStart, " +
					"HOP_START(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wstart, " +
					"HOP_END(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wend, " +
					"COUNT(isStart) AS popCnt " +
				"FROM " +
					"(SELECT " +
						"eventTime, " +
						"isStart, " +
						"CASE WHEN isStart THEN toCellId(startLon, startLat) ELSE toCellId(endLon, endLat) END AS cell " +
					"FROM TaxiRides " +
					"WHERE isInNYC(startLon, startLat) AND isInNYC(endLon, endLat)) " +
				"GROUP BY cell, isStart, HOP(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE)) " +
			"WHERE popCnt > 20"
			);

		// convert Table into an append stream and print it
		// (if instead we needed a retraction stream we would use tEnv.toRetractStream)
		tEnv.toAppendStream(results, Row.class).print();

		// execute query
		env.execute();
	}
 
Example 15
Source File: KinesisExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}
 
Example 16
Source File: ManualExactlyOnceTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool pt = ParameterTool.fromArgs(args);
	LOG.info("Starting exactly once test");

	final String streamName = "flink-test-" + UUID.randomUUID().toString();
	final String accessKey = pt.getRequired("accessKey");
	final String secretKey = pt.getRequired("secretKey");
	final String region = pt.getRequired("region");

	Properties configProps = new Properties();
	configProps.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, accessKey);
	configProps.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, secretKey);
	configProps.setProperty(AWSConfigConstants.AWS_REGION, region);
	AmazonKinesis client = AWSUtil.createKinesisClient(configProps);

	// create a stream for the test:
	client.createStream(streamName, 1);

	// wait until stream has been created
	DescribeStreamResult status = client.describeStream(streamName);
	LOG.info("status {}" , status);
	while (!status.getStreamDescription().getStreamStatus().equals("ACTIVE")) {
		status = client.describeStream(streamName);
		LOG.info("Status of stream {}", status);
		Thread.sleep(1000);
	}

	final Configuration flinkConfig = new Configuration();
	flinkConfig.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("16m"));
	flinkConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "0 s");

	MiniClusterResource flink = new MiniClusterResource(new MiniClusterResourceConfiguration.Builder()
		.setNumberTaskManagers(1)
		.setNumberSlotsPerTaskManager(8)
		.setConfiguration(flinkConfig)
		.build());
	flink.before();

	final int flinkPort = flink.getRestAddres().getPort();

	try {
		final AtomicReference<Throwable> producerError = new AtomicReference<>();
		Thread producerThread = KinesisEventsGeneratorProducerThread.create(
			TOTAL_EVENT_COUNT, 2,
			accessKey, secretKey, region, streamName,
			producerError, flinkPort, flinkConfig);
		producerThread.start();

		final AtomicReference<Throwable> consumerError = new AtomicReference<>();
		Thread consumerThread = ExactlyOnceValidatingConsumerThread.create(
			TOTAL_EVENT_COUNT, 200, 2, 500, 500,
			accessKey, secretKey, region, streamName,
			consumerError, flinkPort, flinkConfig);
		consumerThread.start();

		boolean deadlinePassed = false;
		long deadline = System.currentTimeMillis() + (1000 * 2 * 60); // wait at most for two minutes
		// wait until both producer and consumer finishes, or an unexpected error is thrown
		while ((consumerThread.isAlive() || producerThread.isAlive()) &&
			(producerError.get() == null && consumerError.get() == null)) {
			Thread.sleep(1000);
			if (System.currentTimeMillis() >= deadline) {
				LOG.warn("Deadline passed");
				deadlinePassed = true;
				break; // enough waiting
			}
		}

		if (producerThread.isAlive()) {
			producerThread.interrupt();
		}

		if (consumerThread.isAlive()) {
			consumerThread.interrupt();
		}

		if (producerError.get() != null) {
			LOG.info("+++ TEST failed! +++");
			throw new RuntimeException("Producer failed", producerError.get());
		}
		if (consumerError.get() != null) {
			LOG.info("+++ TEST failed! +++");
			throw new RuntimeException("Consumer failed", consumerError.get());
		}

		if (!deadlinePassed) {
			LOG.info("+++ TEST passed! +++");
		} else {
			LOG.info("+++ TEST failed! +++");
		}

	} finally {
		client.deleteStream(streamName);
		client.shutdown();

		// stopping flink
		flink.after();
	}
}
 
Example 17
Source File: FlinkPulsarBatchJsonSinkExample.java    From pulsar with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        // parse input arguments
        final ParameterTool parameterTool = ParameterTool.fromArgs(args);

        if (parameterTool.getNumberOfParameters() < 2) {
            System.out.println("Missing parameters!");
            System.out.println("Usage: pulsar --service-url <pulsar-service-url> --topic <topic>");
            return;
        }

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(parameterTool);

        String serviceUrl = parameterTool.getRequired("service-url");
        String topic = parameterTool.getRequired("topic");

        System.out.println("Parameters:");
        System.out.println("\tServiceUrl:\t" + serviceUrl);
        System.out.println("\tTopic:\t" + topic);

        // create PulsarJsonOutputFormat instance
        final OutputFormat<NasaMission> pulsarJsonOutputFormat = new PulsarJsonOutputFormat<>(serviceUrl, topic, new AuthenticationDisabled());

        // create DataSet
        DataSet<NasaMission> nasaMissionDS = env.fromCollection(nasaMissions);
        // map nasa mission names to upper-case
        nasaMissionDS.map(nasaMission -> new NasaMission(
                nasaMission.id,
                nasaMission.missionName.toUpperCase(),
                nasaMission.startYear,
                nasaMission.endYear))
        // filter missions which started after 1970
        .filter(nasaMission -> nasaMission.startYear > 1970)
        // write batch data to Pulsar
        .output(pulsarJsonOutputFormat);

        // set parallelism to write Pulsar in parallel (optional)
        env.setParallelism(2);

        // execute program
        env.execute("Flink - Pulsar Batch Json");
    }
 
Example 18
Source File: StreamSQLTestProgram.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");
		String planner = params.get("planner", "old");

		final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance();
		builder.inStreamingMode();

		if (planner.equals("old")) {
			builder.useOldPlanner();
		} else if (planner.equals("blink")) {
			builder.useBlinkPlanner();
		}

		final EnvironmentSettings settings = builder.build();

		final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}
 
Example 19
Source File: QsStateClient.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {

		ParameterTool parameters = ParameterTool.fromArgs(args);

		// setup values
		String jobId = parameters.getRequired("job-id");
		String host = parameters.get("host", "localhost");
		int port = parameters.getInt("port", 9069);
		int numIterations = parameters.getInt("iterations", 1500);

		QueryableStateClient client = new QueryableStateClient(host, port);
		client.setExecutionConfig(new ExecutionConfig());

		MapStateDescriptor<EmailId, EmailInformation> stateDescriptor =
				new MapStateDescriptor<>(
						QsConstants.STATE_NAME,
						TypeInformation.of(new TypeHint<EmailId>() {

						}),
						TypeInformation.of(new TypeHint<EmailInformation>() {

						})
				);

		// wait for state to exist
		for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s
			try {
				getMapState(jobId, client, stateDescriptor);
				break;
			} catch (ExecutionException e) {
				if (e.getCause() instanceof UnknownKeyOrNamespaceException) {
					System.err.println("State does not exist yet; sleeping 500ms");
					Thread.sleep(500L);
				} else {
					throw e;
				}
			}

			if (i == (BOOTSTRAP_RETRIES - 1)) {
				throw new RuntimeException("Timeout: state doesn't exist after 120s");
			}
		}

		// query state
		for (int iterations = 0; iterations < numIterations; iterations++) {

			MapState<EmailId, EmailInformation> mapState =
				getMapState(jobId, client, stateDescriptor);

			int counter = 0;
			for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) {
				// this is to force deserialization
				entry.getKey();
				entry.getValue();
				counter++;
			}
			System.out.println("MapState has " + counter + " entries"); // we look for it in the test

			Thread.sleep(100L);
		}
	}
 
Example 20
Source File: TestAvroConsumerConfluent.java    From flink with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	if (parameterTool.getNumberOfParameters() < 6) {
		System.out.println("Missing parameters!\n" +
			"Usage: Kafka --input-topic <topic> --output-string-topic <topic> --output-avro-topic <topic> " +
			"--bootstrap.servers <kafka brokers> " +
			"--schema-registry-url <confluent schema registry> --group.id <some id>");
		return;
	}
	Properties config = new Properties();
	config.setProperty("bootstrap.servers", parameterTool.getRequired("bootstrap.servers"));
	config.setProperty("group.id", parameterTool.getRequired("group.id"));
	String schemaRegistryUrl = parameterTool.getRequired("schema-registry-url");

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<User> input = env
		.addSource(
			new FlinkKafkaConsumer010<>(
				parameterTool.getRequired("input-topic"),
				ConfluentRegistryAvroDeserializationSchema.forSpecific(User.class, schemaRegistryUrl),
				config).setStartFromEarliest());

	SingleOutputStreamOperator<String> mapToString = input
		.map((MapFunction<User, String>) SpecificRecordBase::toString);

	FlinkKafkaProducer010<String> stringFlinkKafkaProducer010 = new FlinkKafkaProducer010<>(
		parameterTool.getRequired("output-string-topic"),
		new SimpleStringSchema(),
		config);
	mapToString.addSink(stringFlinkKafkaProducer010);

	FlinkKafkaProducer010<User> avroFlinkKafkaProducer010 = new FlinkKafkaProducer010<>(
			parameterTool.getRequired("output-avro-topic"),
			ConfluentRegistryAvroSerializationSchema.forSpecific(User.class, parameterTool.getRequired("output-subject"), schemaRegistryUrl),
			config);
	input.addSink(avroFlinkKafkaProducer010);

	env.execute("Kafka 0.10 Confluent Schema Registry AVRO Example");
}