Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#execute()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#execute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
/**
 * @see <a href="https://docs.wso2.com/display/CEP300/Joins">https://docs.wso2.com/display/CEP300/Patterns</a>
 */
@Test
public void testUnboundedPojoStreamSimplePatternMatch() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    DataStream<Event> input1 = env.addSource(new RandomEventSource(5).closeDelay(1500), "input1");
    DataStream<Event> input2 = env.addSource(new RandomEventSource(5).closeDelay(1500), "input2");

    DataStream<Map<String, Object>> output = SiddhiCEP
        .define("inputStream1", input1.keyBy("name"), "id", "name", "price", "timestamp")
        .union("inputStream2", input2.keyBy("name"), "id", "name", "price", "timestamp")
        .cql(
            "from every s1 = inputStream1[id == 2] "
                + " -> s2 = inputStream2[id == 3] "
                + "select s1.id as id_1, s1.name as name_1, s2.id as id_2, s2.name as name_2 "
                + "insert into outputStream"
        )
        .returnAsMap("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(1, getLineCount(resultPath));
    compareResultsByLinesInMemory("{id_1=2, name_1=test_event, id_2=3, name_2=test_event}", resultPath);
}
 
Example 2
Source File: TestFilterEdges.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithEmptyFilter() throws Exception {
	/*
	 * Test filterEdges() with a filter that constantly returns true
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,2,12\n" +
               "1,3,13\n" +
               "2,3,23\n" +
               "3,4,34\n" +
               "3,5,35\n" +
               "4,5,45\n" +
               "5,1,51\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	graph.filterEdges(new EmptyFilter())
               .getEdges().writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

       compareResultsByLinesInMemory(expectedResult, resultPath);
   }
 
Example 3
Source File: KinesisConsumeFromDynamoDBStreams.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties dynamodbStreamsConsumerConfig = new Properties();
    final String streamName = parameterTool.getRequired("stream.name");

    dynamodbStreamsConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    dynamodbStreamsConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    dynamodbStreamsConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));


    DataStream<String> dynamodbStreams = env.addSource(new FlinkDynamoDBStreamsConsumer<>(
            streamName,
            new SimpleStringSchema(),
            dynamodbStreamsConsumerConfig));

    dynamodbStreams.print();

    env.execute();
}
 
Example 4
Source File: CassandraTupleWriteAheadSinkExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(1000);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
	env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));

	CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
		.setQuery("INSERT INTO example.values (id, counter) values (?, ?);")
		.enableWriteAheadLog()
		.setClusterBuilder(new ClusterBuilder() {

			private static final long serialVersionUID = 2793938419775311824L;

			@Override
			public Cluster buildCluster(Cluster.Builder builder) {
				return builder.addContactPoint("127.0.0.1").build();
			}
		})
		.build();

	sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");

	env.execute();
}
 
Example 5
Source File: CompressionFactoryITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteCompressedFile() throws Exception {
	final File folder = TEMPORARY_FOLDER.newFolder();
	final Path testPath = Path.fromLocalFile(folder);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<String> stream = env.addSource(
			new FiniteTestSource<>(testData),
			TypeInformation.of(String.class)
	);

	stream.map(str -> str).addSink(
			StreamingFileSink.forBulkFormat(
					testPath,
					CompressWriters.forExtractor(new DefaultExtractor<String>()).withHadoopCompression(TEST_CODEC_NAME)
			).build());

	env.execute();

	validateResults(folder, testData, new CompressionCodecFactory(configuration).getCodecByName(TEST_CODEC_NAME));
}
 
Example 6
Source File: CassandraTupleSinkExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<String, Integer>> source = env.fromCollection(collection);

	CassandraSink.addSink(source)
		.setQuery(INSERT)
		.setClusterBuilder(new ClusterBuilder() {
			@Override
			protected Cluster buildCluster(Builder builder) {
				return builder.addContactPoint("127.0.0.1").build();
			}
		})
		.build();

	env.execute("WriteTupleIntoCassandra");
}
 
Example 7
Source File: WordCount.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataStream<String> text;
		if (params.has("input")) {
			// read the text file from given input path
			text = env.readTextFile(params.get("input"));
		} else {
			System.out.println("Executing WordCount example with default input data set.");
			System.out.println("Use --input to specify file input.");
			// get default test text data
			text = env.fromElements(WordCountData.WORDS);
		}

		DataStream<Tuple2<String, Integer>> counts =
			// split up the lines in pairs (2-tuples) containing: (word,1)
			text.flatMap(new Tokenizer())
			// group by the tuple field "0" and sum up tuple field "1"
			.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			counts.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			counts.print();
		}

		// execute program
		env.execute("Streaming WordCount");
	}
 
Example 8
Source File: TestUserClassLoaderJob.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStreamSource<Integer> source = env.fromElements(new TestUserClassLoaderJobLib().getValue(), 1, 2, 3, 4);
	final SingleOutputStreamOperator<Integer> mapper = source.map(element -> 2 * element);
	mapper.addSink(new DiscardingSink<>());

	ParameterTool parameterTool = ParameterTool.fromArgs(args);
	env.execute(TestUserClassLoaderJob.class.getCanonicalName() + "-" + parameterTool.getRequired("arg"));
}
 
Example 9
Source File: DistributedCacheTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testStreamingDistributedCache() throws Exception {
	String textPath = createTempFile("count.txt", DATA);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(textPath, "cache_test");
	env.readTextFile(textPath).flatMap(new WordChecker());
	env.execute();
}
 
Example 10
Source File: CheckpointedLongRidesSolution.java    From flink-training-exercises with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);
		final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		// set up checkpointing
		env.setStateBackend(new FsStateBackend("file:///tmp/checkpoints"));
		env.enableCheckpointing(1000);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(60, Time.of(10, TimeUnit.SECONDS)));

		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new CheckpointedTaxiRideSource(input, servingSpeedFactor)));

		DataStream<TaxiRide> longRides = rides
				.filter(new NYCFilter())
				.keyBy((TaxiRide ride) -> ride.rideId)
				.process(new MatchFunction());

		printOrTest(longRides);

		env.execute("Long Taxi Rides (checkpointed)");
	}
 
Example 11
Source File: DataStreamPojoITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedPojoFieldAccessor() throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.getConfig().disableObjectReuse();
	see.setParallelism(4);

	DataStream<Data> dataStream = see.fromCollection(elements);

	DataStream<Data> summedStream = dataStream
		.keyBy("aaa")
		.sum("stats.count")
		.keyBy("aaa")
		.flatMap(new FlatMapFunction<Data, Data>() {
			Data[] first = new Data[3];
			@Override
			public void flatMap(Data value, Collector<Data> out) throws Exception {
				if (first[value.aaa] == null) {
					first[value.aaa] = value;
					if (value.stats.count != 123) {
						throw new RuntimeException("Expected stats.count to be 123");
					}
				} else {
					if (value.stats.count != 2 * 123) {
						throw new RuntimeException("Expected stats.count to be 2 * 123");
					}
				}
			}
		});

	summedStream.print();

	see.execute();
}
 
Example 12
Source File: JavaSqlITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRowRegisterRowWithNames() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
	StreamITCase.clear();

	List<Row> data = new ArrayList<>();
	data.add(Row.of(1, 1L, "Hi"));
	data.add(Row.of(2, 2L, "Hello"));
	data.add(Row.of(3, 2L, "Hello world"));

	TypeInformation<?>[] types = {
			BasicTypeInfo.INT_TYPE_INFO,
			BasicTypeInfo.LONG_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO};
	String[] names = {"a", "b", "c"};

	RowTypeInfo typeInfo = new RowTypeInfo(types, names);

	DataStream<Row> ds = env.fromCollection(data).returns(typeInfo);

	Table in = tableEnv.fromDataStream(ds, "a,b,c");
	tableEnv.registerTable("MyTableRow", in);

	String sqlQuery = "SELECT a,c FROM MyTableRow";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<Row>());
	env.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,Hi");
	expected.add("2,Hello");
	expected.add("3,Hello world");

	StreamITCase.compareWithList(expected);
}
 
Example 13
Source File: FlinkPravegaTableITCase.java    From flink-connectors with Apache License 2.0 4 votes vote down vote up
private void testTableSourceStreamingDescriptor(Stream stream, PravegaConfig pravegaConfig) throws Exception {
    final StreamExecutionEnvironment execEnvRead = StreamExecutionEnvironment.getExecutionEnvironment();
    execEnvRead.setParallelism(1);
    execEnvRead.enableCheckpointing(100);
    execEnvRead.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    StreamTableEnvironment tableEnv = StreamTableEnvironment.create(execEnvRead,
            EnvironmentSettings.newInstance()
                    // watermark is only supported in blink planner
                    .useBlinkPlanner()
                    .inStreamingMode()
                    .build());
    RESULTS.clear();

    // read data from the stream using Table reader
    Schema schema = new Schema()
            .field("user", DataTypes.STRING())
            .field("uri", DataTypes.STRING())
            .field("accessTime", DataTypes.TIMESTAMP(3)).rowtime(
                    new Rowtime().timestampsFromField("accessTime").watermarksPeriodicBounded(30000L));

    Pravega pravega = new Pravega();
    pravega.tableSourceReaderBuilder()
            .withReaderGroupScope(stream.getScope())
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(true))
            .withSchema(schema)
            .inAppendMode();

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSource<?> source = TableFactoryService.find(StreamTableSourceFactory.class, propertiesMap)
            .createStreamTableSource(propertiesMap);

    String tableSourcePath = tableEnv.getCurrentDatabase() + "." + "MyTableRow";

    ConnectorCatalogTable<?, ?> connectorCatalogSourceTable = ConnectorCatalogTable.source(source, false);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSourcePath),
            connectorCatalogSourceTable, false);

    String sqlQuery = "SELECT user, " +
            "TUMBLE_END(accessTime, INTERVAL '5' MINUTE) AS accessTime, " +
            "COUNT(uri) AS cnt " +
            "from MyTableRow GROUP BY " +
            "user, TUMBLE(accessTime, INTERVAL '5' MINUTE)";
    Table result = tableEnv.sqlQuery(sqlQuery);

    DataStream<Tuple2<Boolean, Row>> resultSet = tableEnv.toRetractStream(result, Row.class);
    StringSink2 stringSink = new StringSink2(8);
    resultSet.addSink(stringSink);

    try {
        execEnvRead.execute("ReadRowData");
    } catch (Exception e) {
        if (!(ExceptionUtils.getRootCause(e) instanceof SuccessException)) {
            throw e;
        }
    }

    log.info("results: {}", RESULTS);
    boolean compare = compare(RESULTS, getExpectedResultsAppend());
    assertTrue("Output does not match expected result", compare);
}
 
Example 14
Source File: IterateExample.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
        env.getConfig().setGlobalJobParameters(params);



        IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource())
                .map(new InputMap())
                .iterate(5000);

        SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
                .split(new MySelector());

        it.closeWith(step.select("iterate"));

        step.select("output")
                .map(new OutputMap())
                .print();

        env.execute("Streaming Iteration Example");
    }
 
Example 15
Source File: ContinuousFileProcessingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	Thread job = new Thread() {

		@Override
		public void run() {
			try {
				env.execute("ContinuousFileProcessingITCase Job.");
			} catch (Exception e) {
				Throwable th = e;
				for (int depth = 0; depth < 20; depth++) {
					if (th instanceof SuccessException) {
						return;
					} else if (th.getCause() != null) {
						th = th.getCause();
					} else {
						break;
					}
				}
				e.printStackTrace();
				Assert.fail(e.getMessage());
			}
		}
	};
	job.start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	// wait for the job to finish.
	job.join();
}
 
Example 16
Source File: CoGroupJoinITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSelfJoin() throws Exception {

	testResults = new ArrayList<>();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
			ctx.collect(Tuple3.of("a", "x", 0));
			ctx.collect(Tuple3.of("a", "y", 1));
			ctx.collect(Tuple3.of("a", "z", 2));

			ctx.collect(Tuple3.of("b", "u", 3));
			ctx.collect(Tuple3.of("b", "w", 5));

			ctx.collect(Tuple3.of("a", "i", 6));
			ctx.collect(Tuple3.of("a", "j", 7));
			ctx.collect(Tuple3.of("a", "k", 8));

			// source is finite, so it will have an implicit MAX watermark when it finishes
		}

		@Override
		public void cancel() {
		}
	}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());

	source1.join(source1)
			.where(new Tuple3KeyExtractor())
			.equalTo(new Tuple3KeyExtractor())
			.window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS)))
			.apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
				@Override
				public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
					return first + ":" + second;
				}
			})
			.addSink(new SinkFunction<String>() {
				@Override
				public void invoke(String value) throws Exception {
					testResults.add(value);
				}
			});

	env.execute("Self-Join Test");

	List<String> expectedResult = Arrays.asList(
			"(a,x,0):(a,x,0)",
			"(a,x,0):(a,y,1)",
			"(a,x,0):(a,z,2)",
			"(a,y,1):(a,x,0)",
			"(a,y,1):(a,y,1)",
			"(a,y,1):(a,z,2)",
			"(a,z,2):(a,x,0)",
			"(a,z,2):(a,y,1)",
			"(a,z,2):(a,z,2)",
			"(b,u,3):(b,u,3)",
			"(b,u,3):(b,w,5)",
			"(b,w,5):(b,u,3)",
			"(b,w,5):(b,w,5)",
			"(a,i,6):(a,i,6)",
			"(a,i,6):(a,j,7)",
			"(a,i,6):(a,k,8)",
			"(a,j,7):(a,i,6)",
			"(a,j,7):(a,j,7)",
			"(a,j,7):(a,k,8)",
			"(a,k,8):(a,i,6)",
			"(a,k,8):(a,j,7)",
			"(a,k,8):(a,k,8)");

	Collections.sort(expectedResult);
	Collections.sort(testResults);

	Assert.assertEquals(expectedResult, testResults);
}
 
Example 17
Source File: HistoryServerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static void runJob() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.fromElements(1, 2, 3).addSink(new DiscardingSink<>());

	env.execute();
}
 
Example 18
Source File: Kafka09ITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Kafka09 specific RateLimiter test. This test produces 100 bytes of data to a test topic
 * and then runs a job with {@link FlinkKafkaConsumer09} as the source and a {@link GuavaFlinkConnectorRateLimiter} with
 * a desired rate of 3 bytes / second. Based on the execution time, the test asserts that this rate was not surpassed.
 * If no rate limiter is set on the consumer, the test should fail.
 */
@Test(timeout = 60000)
public void testRateLimitedConsumer() throws Exception {
	final String testTopic = "testRateLimitedConsumer";
	createTestTopic(testTopic, 3, 1);

	// ---------- Produce a stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	DataStream<String> stream = env.addSource(new SourceFunction<String>() {
		private static final long serialVersionUID = 1L;
		boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) {
			long i = 0;
			while (running) {
				byte[] data = new byte[] {1};
				synchronized (ctx.getCheckpointLock()) {
					ctx.collect(new String(data)); // 1 byte
				}
				if (i++ == 100L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	Properties producerProperties = new Properties();
	producerProperties.putAll(standardProps);
	producerProperties.putAll(secureProps);
	producerProperties.put("retries", 3);

	stream.addSink(new FlinkKafkaProducer09<>(testTopic, new SimpleStringSchema(), producerProperties));
	env.execute("Produce 100 bytes of data to test topic");

	// ---------- Consumer from Kafka in a ratelimited way -----------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();

	// ---------- RateLimiter config -------------
	final long globalRate = 10; // bytes/second
	FlinkKafkaConsumer09<String> consumer09 = new FlinkKafkaConsumer09<>(testTopic,
		new StringDeserializer(globalRate), standardProps);
	FlinkConnectorRateLimiter rateLimiter = new GuavaFlinkConnectorRateLimiter();
	rateLimiter.setRate(globalRate);
	consumer09.setRateLimiter(rateLimiter);

	DataStream<String> stream1 = env.addSource(consumer09);
	stream1.addSink(new DiscardingSink<>());
	env.execute("Consume 100 bytes of data from test topic");

	// ------- Assertions --------------
	Assert.assertNotNull(consumer09.getRateLimiter());
	Assert.assertEquals(globalRate, consumer09.getRateLimiter().getRate());

	deleteTestTopic(testTopic);
}
 
Example 19
Source File: TopSpeedWindowing.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}
 
Example 20
Source File: PopularDestinationQuery.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
@Override
public void handleRequest() {

    TableSchema tableSchema = TripRecord.getTableSchema();

    FlinkPravegaJsonTableSource source = FlinkPravegaJsonTableSource.builder()
            .forStream(Stream.of(getScope(), getStream()).getScopedName())
            .withPravegaConfig(getPravegaConfig())
            .failOnMissingField(true)
            .withRowtimeAttribute("pickupTime",
                    new ExistingField("pickupTime"),
                    new BoundedOutOfOrderTimestamps(30000L))
            .withSchema(tableSchema)
            .withReaderGroupScope(getScope())
            .build();


    StreamExecutionEnvironment env = getStreamExecutionEnvironment();

    // create a TableEnvironment
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.registerTableSource("TaxiRide", source);

    String query =
            "SELECT " +
                    "destLocationId, wstart, wend, cnt " +
                    "FROM " +
                    "(SELECT " +
                    "destLocationId, " +
                    "HOP_START(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wstart, " +
                    "HOP_END(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wend, " +
                    "COUNT(destLocationId) AS cnt " +
                    "FROM " +
                    "(SELECT " +
                    "pickupTime, " +
                    "destLocationId " +
                    "FROM TaxiRide) " +
                    "GROUP BY destLocationId, HOP(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE)) " +
                    "WHERE cnt > " + getLimit();

    Table results = tEnv.sqlQuery(query);

    tEnv.toAppendStream(results, Row.class).print();

    try {
        env.execute("Popular-Destination");
    } catch (Exception e) {
        log.error("Application Failed", e);
    }
}