Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#writeAsText()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#writeAsText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
@Test(expected = UndefinedStreamException.class)
public void testTriggerUndefinedStreamException() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input1 = env.addSource(new RandomEventSource(5), "input1");

    SiddhiCEP cep = SiddhiCEP.getSiddhiEnvironment(env);
    cep.registerStream("inputStream1", input1.keyBy("id"), "id", "name", "price", "timestamp");

    DataStream<Map<String, Object>> output = cep
        .from("inputStream1").union("inputStream2")
        .cql(
            "from inputStream1#window.length(5) as s1 "
                + "join inputStream2#window.time(500) as s2 "
                + "on s1.id == s2.id "
                + "select s1.timestamp as t, s1.name as n, s1.price as p1, s2.price as p2 "
                + "insert into JoinStream;"
        )
        .returnAsMap("JoinStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
}
 
Example 2
Source File: SiddhiCEPITCase.java    From flink-siddhi with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedPojoSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5));

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    DataStream<Integer> following = output.map(new MapFunction<Tuple4<Long, Integer, String, Double>, Integer>() {
        @Override
        public Integer map(Tuple4<Long, Integer, String, Double> value) throws Exception {
            return value.f1;
        }
    });
    String resultPath = tempFolder.newFile().toURI().toString();
    following.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 3
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
/**
 * @see <a href="https://docs.wso2.com/display/CEP300/Joins">https://docs.wso2.com/display/CEP300/Sequences</a>
 */
@Test
public void testUnboundedPojoStreamSimpleSequences() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input1 = env.addSource(new RandomEventSource(5).closeDelay(1500), "input1");
    DataStream<Map<String, Object>> output = SiddhiCEP
        .define("inputStream1", input1.keyBy("name"), "id", "name", "price", "timestamp")
        .union("inputStream2", input1.keyBy("name"), "id", "name", "price", "timestamp")
        .cql(
            "from every s1 = inputStream1[id == 2]+ , "
                + "s2 = inputStream2[id == 3]? "
                + "within 1000 second "
                + "select s1[0].name as n1, s2.name as n2 "
                + "insert into outputStream"
        )
        .returnAsMap("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(1, getLineCount(resultPath));
}
 
Example 4
Source File: SiddhiCEPITCase.java    From flink-siddhi with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedTupleSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple4<Integer, String, Double, Long>> input = env
        .addSource(new RandomTupleSource(5).closeDelay(1500)).keyBy(1);

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 5
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
@Test(expected = InvalidTypesException.class)
public void testUnboundedPojoSourceButReturnInvalidTupleType() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5).closeDelay(1500));

    DataStream<Tuple5<Long, Integer, String, Double, Long>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    DataStream<Long> following = output.map(new MapFunction<Tuple5<Long, Integer, String, Double, Long>, Long>() {
        @Override
        public Long map(Tuple5<Long, Integer, String, Double, Long> value) throws Exception {
            return value.f0;
        }
    });

    String resultPath = tempFolder.newFile().toURI().toString();
    following.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
    env.execute();
}
 
Example 6
Source File: SiddhiCEPITCase.java    From flink-siddhi with Apache License 2.0 6 votes vote down vote up
@Test(expected = InvalidTypesException.class)
public void testUnboundedPojoSourceButReturnInvalidTupleType() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5).closeDelay(1500));

    DataStream<Tuple5<Long, Integer, String, Double, Long>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    DataStream<Long> following = output.map(new MapFunction<Tuple5<Long, Integer, String, Double, Long>, Long>() {
        @Override
        public Long map(Tuple5<Long, Integer, String, Double, Long> value) throws Exception {
            return value.f0;
        }
    });

    String resultPath = tempFolder.newFile().toURI().toString();
    following.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
    env.execute();
}
 
Example 7
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultipleUnboundedPojoStreamSimpleUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input1 = env.addSource(new RandomEventSource(2), "input1");
    DataStream<Event> input2 = env.addSource(new RandomEventSource(2), "input2");
    DataStream<Event> input3 = env.addSource(new RandomEventSource(2), "input2");
    DataStream<Event> output = SiddhiCEP
        .define("inputStream1", input1, "id", "name", "price", "timestamp")
        .union("inputStream2", input2, "id", "name", "price", "timestamp")
        .union("inputStream3", input3, "id", "name", "price", "timestamp")
        .cql(
            "from inputStream1 select timestamp, id, name, price insert into outputStream;"
                + "from inputStream2 select timestamp, id, name, price insert into outputStream;"
                + "from inputStream3 select timestamp, id, name, price insert into outputStream;"
        )
        .returns("outputStream", Event.class);

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(6, getLineCount(resultPath));
}
 
Example 8
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
/**
 * @see <a href="https://docs.wso2.com/display/CEP300/Joins">https://docs.wso2.com/display/CEP300/Patterns</a>
 */
@Test
public void testUnboundedPojoStreamSimplePatternMatch() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    DataStream<Event> input1 = env.addSource(new RandomEventSource(5).closeDelay(1500), "input1");
    DataStream<Event> input2 = env.addSource(new RandomEventSource(5).closeDelay(1500), "input2");

    DataStream<Map<String, Object>> output = SiddhiCEP
        .define("inputStream1", input1.keyBy("name"), "id", "name", "price", "timestamp")
        .union("inputStream2", input2.keyBy("name"), "id", "name", "price", "timestamp")
        .cql(
            "from every s1 = inputStream1[id == 2] "
                + " -> s2 = inputStream2[id == 3] "
                + "select s1.id as id_1, s1.name as name_1, s2.id as id_2, s2.name as name_2 "
                + "insert into outputStream"
        )
        .returnAsMap("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(1, getLineCount(resultPath));
    compareResultsByLinesInMemory("{id_1=2, name_1=test_event, id_2=3, name_2=test_event}", resultPath);
}
 
Example 9
Source File: SiddhiCEPITCase.java    From flink-siddhi with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedPojoStreamAndReturnMap() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    DataStream<Event> input = env.addSource(new RandomEventSource(5));

    DataStream<Map<String,Object>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returnAsMap("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 10
Source File: WordCount.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataStream<String> text;
		if (params.has("input")) {
			// read the text file from given input path
			text = env.readTextFile(params.get("input"));
		} else {
			System.out.println("Executing WordCount example with default input data set.");
			System.out.println("Use --input to specify file input.");
			// get default test text data
			text = env.fromElements(WordCountData.WORDS);
		}

		DataStream<Tuple2<String, Integer>> counts =
			// split up the lines in pairs (2-tuples) containing: (word,1)
			text.flatMap(new Tokenizer())
			// group by the tuple field "0" and sum up tuple field "1"
			.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			counts.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			counts.print();
		}

		// execute program
		env.execute("Streaming WordCount");
	}
 
Example 11
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegisterStreamAndExtensionWithSiddhiCEPEnvironment() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input1 = env.addSource(new RandomEventSource(5), "input1");
    DataStream<Event> input2 = env.addSource(new RandomEventSource(5), "input2");

    SiddhiCEP cep = SiddhiCEP.getSiddhiEnvironment(env);
    cep.registerExtension("custom:plus", CustomPlusFunctionExtension.class);

    cep.registerStream("inputStream1", input1.keyBy("id"), "id", "name", "price", "timestamp");
    cep.registerStream("inputStream2", input2.keyBy("id"), "id", "name", "price", "timestamp");

    DataStream<Tuple4<Long, String, Double, Double>> output = cep
        .from("inputStream1").union("inputStream2")
        .cql(
            "from inputStream1#window.length(5) as s1 "
                + "join inputStream2#window.time(500) as s2 "
                + "on s1.id == s2.id "
                + "select s1.timestamp as t, s1.name as n, s1.price as p1, s2.price as p2 "
                + "insert into JoinStream;"
        )
        .returns("JoinStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 12
Source File: TwitterExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: TwitterExample [--output <path>] " +
				"[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]");

		// set up the execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		env.setParallelism(params.getInt("parallelism", 1));

		// get input data
		DataStream<String> streamSource;
		if (params.has(TwitterSource.CONSUMER_KEY) &&
				params.has(TwitterSource.CONSUMER_SECRET) &&
				params.has(TwitterSource.TOKEN) &&
				params.has(TwitterSource.TOKEN_SECRET)
				) {
			streamSource = env.addSource(new TwitterSource(params.getProperties()));
		} else {
			System.out.println("Executing TwitterStream example with default props.");
			System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " +
					"--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info.");
			// get default test text data
			streamSource = env.fromElements(TwitterExampleData.TEXTS);
		}

		DataStream<Tuple2<String, Integer>> tweets = streamSource
				// selecting English tweets and splitting to (word, 1)
				.flatMap(new SelectEnglishAndTokenizeFlatMap())
				// group by words and sum their occurrences
				.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			tweets.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			tweets.print();
		}

		// execute program
		env.execute("Twitter Streaming Example");
	}
 
Example 13
Source File: WindowWordCount.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataStream<String> text;
		if (params.has("input")) {
			// read the text file from given input path
			text = env.readTextFile(params.get("input"));
		} else {
			System.out.println("Executing WindowWordCount example with default input data set.");
			System.out.println("Use --input to specify file input.");
			// get default test text data
			text = env.fromElements(WordCountData.WORDS);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int windowSize = params.getInt("window", 10);
		final int slideSize = params.getInt("slide", 5);

		DataStream<Tuple2<String, Integer>> counts =
		// split up the lines in pairs (2-tuples) containing: (word,1)
		text.flatMap(new WordCount.Tokenizer())
				// create windows of windowSize records slided every slideSize records
				.keyBy(0)
				.countWindow(windowSize, slideSize)
				// group by the tuple field "0" and sum up tuple field "1"
				.sum(1);

		// emit result
		if (params.has("output")) {
			counts.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			counts.print();
		}

		// execute program
		env.execute("WindowWordCount");
	}
 
Example 14
Source File: TopSpeedWindowing.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}
 
Example 15
Source File: TextOutputFormatITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {
	String resultPath = getTempDirPath("result");

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> text = env.fromElements(WordCountData.TEXT);

	DataStream<Tuple2<String, Integer>> counts = text
			.flatMap(new Tokenizer())
			.keyBy(0).sum(1);

	counts.writeAsText(resultPath);

	env.execute("WriteAsTextTest");

	compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES, resultPath);
}
 
Example 16
Source File: SessionWindowing.java    From flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {

	final ParameterTool params = ParameterTool.fromArgs(args);
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.getConfig().setGlobalJobParameters(params);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	final boolean fileOutput = params.has("output");

	final List<Tuple3<String, Long, Integer>> input = new ArrayList<>();

	input.add(new Tuple3<>("a", 1L, 1));
	input.add(new Tuple3<>("b", 1L, 1));
	input.add(new Tuple3<>("b", 3L, 1));
	input.add(new Tuple3<>("b", 5L, 1));
	input.add(new Tuple3<>("c", 6L, 1));
	// We expect to detect the session "a" earlier than this point (the old
	// functionality can only detect here when the next starts)
	input.add(new Tuple3<>("a", 10L, 1));
	// We expect to detect session "b" and "c" at this point as well
	input.add(new Tuple3<>("c", 11L, 1));

	DataStream<Tuple3<String, Long, Integer>> source = env
			.addSource(new SourceFunction<Tuple3<String, Long, Integer>>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void run(SourceContext<Tuple3<String, Long, Integer>> ctx) throws Exception {
					for (Tuple3<String, Long, Integer> value : input) {
						ctx.collectWithTimestamp(value, value.f1);
						ctx.emitWatermark(new Watermark(value.f1 - 1));
					}
					ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
				}

				@Override
				public void cancel() {
				}
			});

	// We create sessions for each id with max timeout of 3 time units
	DataStream<Tuple3<String, Long, Integer>> aggregated = source
			.keyBy(0)
			.window(EventTimeSessionWindows.withGap(Time.milliseconds(3L)))
			.sum(2);

	if (fileOutput) {
		aggregated.writeAsText(params.get("output"));
	} else {
		System.out.println("Printing result to stdout. Use --output to specify output path.");
		aggregated.print();
	}

	env.execute();
}
 
Example 17
Source File: TopSpeedWindowing.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}
 
Example 18
Source File: ConsumerSample.java    From aliyun-log-flink-connector with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
        final ParameterTool params = ParameterTool.fromArgs(args);
//        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // For local testing
        Configuration conf = new Configuration();
        conf.setString(CheckpointingOptions.CHECKPOINTS_DIRECTORY,
                "file:///Users/kel/Github/flink3/aliyun-log-flink-connector/flink2");
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(1, conf);
        env.getConfig().setGlobalJobParameters(params);
        env.setParallelism(1);
        env.enableCheckpointing(5000);
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        env.setStateBackend(new FsStateBackend("file:///Users/kel/Github/flink3/aliyun-log-flink-connector/flink"));
        Properties configProps = new Properties();
        configProps.put(ConfigConstants.LOG_ENDPOINT, SLS_ENDPOINT);
        configProps.put(ConfigConstants.LOG_ACCESSSKEYID, ACCESS_KEY_ID);
        configProps.put(ConfigConstants.LOG_ACCESSKEY, ACCESS_KEY_SECRET);
        configProps.put(ConfigConstants.LOG_MAX_NUMBER_PER_FETCH, "10");
        configProps.put(ConfigConstants.LOG_CONSUMER_BEGIN_POSITION, Consts.LOG_FROM_CHECKPOINT);
        configProps.put(ConfigConstants.LOG_CONSUMERGROUP, "23_ots_sla_etl_product1");
        configProps.put(ConfigConstants.LOG_CHECKPOINT_MODE, CheckpointMode.ON_CHECKPOINTS.name());
        configProps.put(ConfigConstants.LOG_COMMIT_INTERVAL_MILLIS, "10000");

        FastLogGroupDeserializer deserializer = new FastLogGroupDeserializer();
        DataStream<FastLogGroupList> stream = env.addSource(
                new FlinkLogConsumer<>(SLS_PROJECT, SLS_LOGSTORE, deserializer, configProps));

        stream.flatMap((FlatMapFunction<FastLogGroupList, String>) (value, out) -> {
            for (FastLogGroup logGroup : value.getLogGroups()) {
                int logCount = logGroup.getLogsCount();
                for (int i = 0; i < logCount; i++) {
                    FastLog log = logGroup.getLogs(i);
                    // processing log
                }
            }
        });
        stream.writeAsText("log-" + System.nanoTime());
        env.execute("Flink consumer");
    }
 
Example 19
Source File: WordCount.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final MultipleParameterTool params = MultipleParameterTool.fromArgs(args);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataStream<String> text = null;
		if (params.has("input")) {
			// union all the inputs from text files
			for (String input : params.getMultiParameterRequired("input")) {
				if (text == null) {
					text = env.readTextFile(input);
				} else {
					text = text.union(env.readTextFile(input));
				}
			}
			Preconditions.checkNotNull(text, "Input DataStream should not be null.");
		} else {
			System.out.println("Executing WordCount example with default input data set.");
			System.out.println("Use --input to specify file input.");
			// get default test text data
			text = env.fromElements(WordCountData.WORDS);
		}

		DataStream<Tuple2<String, Integer>> counts =
			// split up the lines in pairs (2-tuples) containing: (word,1)
			text.flatMap(new Tokenizer())
			// group by the tuple field "0" and sum up tuple field "1"
			.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			counts.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			counts.print();
		}
		// execute program
		env.execute("Streaming WordCount");
	}
 
Example 20
Source File: IterateExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000L);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}