org.apache.flink.streaming.api.environment.StreamExecutionEnvironment Java Examples

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TumblingWindow.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // 构建输入数据
    List<Tuple2<String, Long>> data = new ArrayList<>();
    Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
    Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
    data.add(a);
    data.add(b);
    DataStreamSource<Tuple2<String, Long>> input = env.fromCollection(data);

    // 使用 ProcessTime 滚动窗口, 10s 为一个窗口长度
    input.keyBy(x -> x.f1)
            .window(TumblingProcessingTimeWindows.of(Time.seconds(10)))
            .reduce(new MyWindowFunction());

    env.execute();
}
 
Example #2
Source File: TestFilterEdges.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithEmptyFilter() throws Exception {
	/*
	 * Test filterEdges() with a filter that constantly returns true
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,2,12\n" +
               "1,3,13\n" +
               "2,3,23\n" +
               "3,4,34\n" +
               "3,5,35\n" +
               "4,5,45\n" +
               "5,1,51\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	graph.filterEdges(new EmptyFilter())
               .getEdges().writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

       compareResultsByLinesInMemory(expectedResult, resultPath);
   }
 
Example #3
Source File: IntervalJoinITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test(expected = UnsupportedTimeCharacteristicException.class)
public void testExecutionFailsInProcessingTime() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1));
	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1));

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(0))
		.process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {
			@Override
			public void processElement(Tuple2<String, Integer> left,
				Tuple2<String, Integer> right, Context ctx,
				Collector<String> out) throws Exception {
				out.collect(left + ":" + right);
			}
		});
}
 
Example #4
Source File: AllWindowTranslationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSessionWithFoldFails() throws Exception {
	// verify that fold does not work with merging windows

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	AllWindowedStream<String, TimeWindow> windowedStream = env.fromElements("Hello", "Ciao")
			.windowAll(EventTimeSessionWindows.withGap(Time.seconds(5)));

	try {
		windowedStream.fold("", new FoldFunction<String, String>() {
			private static final long serialVersionUID = -4567902917104921706L;

			@Override
			public String fold(String accumulator, String value) throws Exception {
				return accumulator;
			}
		});
	} catch (UnsupportedOperationException e) {
		// expected
		// use a catch to ensure that the exception is thrown by the fold
		return;
	}

	fail("The fold call should fail.");
}
 
Example #5
Source File: AbstractNonKeyedOperatorRestoreTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void createMigrationJob(StreamExecutionEnvironment env) {
	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless);
}
 
Example #6
Source File: SocketWindowWordCount.java    From 163-bigdate-note with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
        // 创建 execution environment
        final StreamExecutionEnvironment env =
                StreamExecutionEnvironment.getExecutionEnvironment();
        // 通过连接 socket 获取输入数据,这里连接到本地 9000 端口,如果 9000 端口已被占用,请换一个端口
        DataStream<String> text = env.socketTextStream("localhost", 9000, "\n");
        // 解析数据,按 word 分组,开窗,聚合
        DataStream<Tuple2<String, Integer>> windowCounts = text.
                flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                    @Override
                    public void flatMap(String value, Collector<Tuple2<String,
                            Integer>> out) {
                        for (String word : value.split("\\s")) {
                            out.collect(Tuple2.of(word, 1));
                        }
                    }
                })
                .keyBy(0)
                .timeWindow(Time.seconds(5))
                .sum(1);
// 将结果打印到控制台,注意这里使用的是单线程打印,而非多线程
        windowCounts.print().setParallelism(1);
        env.execute("Socket Window WordCount");
    }
 
Example #7
Source File: StreamingJobGraphGeneratorNodeHashTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a changed operator name does not affect the hash.
 */
@Test
public void testChangedOperatorName() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.addSource(new NoOpSourceFunction(), "A").map(new NoOpMapFunction());
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	JobVertexID expected = jobGraph.getVerticesAsArray()[0].getID();

	env = StreamExecutionEnvironment.createLocalEnvironment();
	env.addSource(new NoOpSourceFunction(), "B").map(new NoOpMapFunction());
	jobGraph = env.getStreamGraph().getJobGraph();

	JobVertexID actual = jobGraph.getVerticesAsArray()[0].getID();

	assertEquals(expected, actual);
}
 
Example #8
Source File: WindowTranslationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #9
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test slot sharing is enabled.
 */
@Test
public void testEnableSlotSharing() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
	DataStream<Integer> mapDataStream = sourceDataStream.map(x -> x + 1);

	final List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(sourceDataStream.getTransformation());
	transformations.add(mapDataStream.getTransformation());

	// all stream nodes share default group by default
	StreamGraph streamGraph = new StreamGraphGenerator(
			transformations, env.getConfig(), env.getCheckpointConfig())
		.generate();

	Collection<StreamNode> streamNodes = streamGraph.getStreamNodes();
	for (StreamNode streamNode : streamNodes) {
		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, streamNode.getSlotSharingGroup());
	}
}
 
Example #10
Source File: AggregateFunctionDemo.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 构建输入数据
        List<Tuple2<String, Long>> data = new ArrayList<>();
        Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
        Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
        data.add(a);
        data.add(b);
        DataStreamSource<Tuple2<String, Long>> input = env.fromCollection(data);


        input.keyBy(x -> x.f1)
                .timeWindow(Time.seconds(10), Time.seconds(1))
                // 自定义一个AggregateFunciton, 将相同标号 f1 的数据的 f0字符串字段合并在一起
                // ("hello", 1L) + ("world", 1L) = ("hello world", 1L)
                .aggregate(new MyAggregateFunction());

        env.execute();
    }
 
Example #11
Source File: StreamingJob.java    From blog_demos with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 18081, "\n");

	DataStream<WordWithCount> windowCount = text.flatMap(new FlatMapFunction<String, WordWithCount>() {
		public void flatMap(String value, Collector<WordWithCount> out) throws Exception {
			String[] splits = value.split("\\s");
			for (String word:splits) {
				out.collect(new WordWithCount(word,1L));
			}
		}
	})
			.keyBy("word")
			.timeWindow(Time.seconds(5),Time.seconds(1))
			.sum("count");
	windowCount.print().setParallelism(1);
	env.execute("Flink Streaming Java API Skeleton");
}
 
Example #12
Source File: CassandraTupleWriteAheadSinkExample.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(1000);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
	env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));

	CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
		.setQuery("INSERT INTO example.values (id, counter) values (?, ?);")
		.enableWriteAheadLog()
		.setClusterBuilder(new ClusterBuilder() {

			private static final long serialVersionUID = 2793938419775311824L;

			@Override
			public Cluster buildCluster(Cluster.Builder builder) {
				return builder.addContactPoint("127.0.0.1").build();
			}
		})
		.build();

	sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");

	env.execute();
}
 
Example #13
Source File: SavepointTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
public <T> String takeSavepoint(Collection<T> data, Function<SourceFunction<T>, StreamExecutionEnvironment> jobGraphFactory) throws Exception {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().disableClosureCleaner();

		WaitingSource<T> waitingSource = createSource(data);

		JobGraph jobGraph = jobGraphFactory.apply(waitingSource).getStreamGraph().getJobGraph();
		JobID jobId = jobGraph.getJobID();

		ClusterClient<?> client = miniClusterResource.getClusterClient();

		try {
			JobSubmissionResult result = ClientUtils.submitJob(client, jobGraph);

			return CompletableFuture
				.runAsync(waitingSource::awaitSource)
				.thenCompose(ignore -> triggerSavepoint(client, result.getJobID()))
				.get(5, TimeUnit.MINUTES);
		} catch (Exception e) {
			throw new RuntimeException("Failed to take savepoint", e);
		} finally {
			client.cancel(jobId);
		}
	}
 
Example #14
Source File: PulsarTableSource.java    From pulsar-flink with Apache License 2.0 6 votes vote down vote up
@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment execEnv) {
    FlinkPulsarRowSource source = new FlinkPulsarRowSource(serviceUrl, adminUrl, properties);
    switch (startupMode) {
        case EARLIEST:
            source.setStartFromEarliest();
            break;
        case LATEST:
            source.setStartFromLatest();
            break;
        case SPECIFIC_OFFSETS:
            source.setStartFromSpecificOffsets(specificStartupOffsets);
            break;
        case EXTERNAL_SUBSCRIPTION:
            source.setStartFromSubscription(externalSubscriptionName);
    }

    return execEnv.addSource(source).name(explainSource());
}
 
Example #15
Source File: BasicTopicStreamingSample.java    From solace-integration-guides with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(4);

        final Hashtable<String, String> jmsEnv = new Hashtable<>();
        jmsEnv.put(InitialContext.INITIAL_CONTEXT_FACTORY, "com.solacesystems.jndi.SolJNDIInitialContextFactory");
        jmsEnv.put(InitialContext.PROVIDER_URL, "smf://192.168.56.101");
        jmsEnv.put(Context.SECURITY_PRINCIPAL, "test@poc_vpn");
        jmsEnv.put(Context.SECURITY_CREDENTIALS, "password");

        env.addSource(new JMSTopicSource<String>(jmsEnv,
                "flink_cf",
                "flink/topic",
                new JMSTextTranslator()))
                .print();

        env.execute();
    }
 
Example #16
Source File: ChainLengthIncreaseTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void createRestoredJob(StreamExecutionEnvironment env) {
	/**
	 * Original job: Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 * Modified job: Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3 -> StatefulMap4)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.RESTORE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.RESTORE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.RESTORE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> stateless2 = createStatelessMap(stateless);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.RESTORE, stateless2);
}
 
Example #17
Source File: KafkaBaseSource.java    From sylph with Apache License 2.0 5 votes vote down vote up
/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}
 
Example #18
Source File: WindowJoin.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse the parameters
	final ParameterTool params = ParameterTool.fromArgs(args);
	final long windowSize = params.getLong("windowSize", 2000);
	final long rate = params.getLong("rate", 3L);

	System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate);
	System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]");

	// obtain execution environment, run this example in "ingestion time"
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	// make parameters available in the web interface
	env.getConfig().setGlobalJobParameters(params);

	// create the data sources for both grades and salaries
	DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate);
	DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate);

	// run the actual window join program
	// for testability, this functionality is in a separate method.
	DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize);

	// print the results with a single thread, rather than in parallel
	joinedStream.print().setParallelism(1);

	// execute program
	env.execute("Windowed Join Example");
}
 
Example #19
Source File: BravoTestPipeline.java    From bravo with Apache License 2.0 5 votes vote down vote up
private StreamExecutionEnvironment createJobGraph(int parallelism,
		Function<DataStream<String>, DataStream<String>> pipelinerBuilder) throws Exception {
	final Path checkpointDir = getCheckpointDir();
	final Path savepointRootDir = getSavepointDir();

	checkpointDir.getFileSystem().mkdirs(checkpointDir);
	savepointRootDir.getFileSystem().mkdirs(savepointRootDir);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();
	env.getCheckpointConfig().enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setBufferTimeout(0);
	env.setParallelism(parallelism);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);

	env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir.toString(), true));

	DataStream<String> sourceData = env
			.addSource(new TestPipelineSource())
			.uid("TestSource")
			.name("TestSource")
			.setParallelism(1);

	pipelinerBuilder.apply(sourceData)
			.addSink(new CollectingSink()).name("Output").uid("Output")
			.setParallelism(1);

	return env;
}
 
Example #20
Source File: AllWindowTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceWithEvictorAndProcessFunction() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DummyReducer reducer = new DummyReducer();

	DataStream<Tuple2<String, Integer>> window1 = source
			.windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.evictor(CountEvictor.of(100))
			.reduce(
					reducer,
					new ProcessAllWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, TimeWindow>() {
						@Override
						public void process(
								Context context,
								Iterable<Tuple2<String, Integer>> elements,
								Collector<Tuple2<String, Integer>> out) throws Exception {
							for (Tuple2<String, Integer> in : elements) {
								out.collect(in);
							}
						}
					});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof EvictingWindowOperator);
	EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?> winOperator = (EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getEvictor() instanceof CountEvictor);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #21
Source File: MLEnvironmentTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstructWithStreamEnv() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(streamExecutionEnvironment);

	MLEnvironment mlEnvironment = new MLEnvironment(streamExecutionEnvironment, streamTableEnvironment);

	Assert.assertSame(mlEnvironment.getStreamExecutionEnvironment(), streamExecutionEnvironment);
	Assert.assertSame(mlEnvironment.getStreamTableEnvironment(), streamTableEnvironment);
}
 
Example #22
Source File: UnalignedCheckpointCompatibilityITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private StreamExecutionEnvironment externalCheckpointEnv(boolean isAligned, File dir, int checkpointingInterval) {
	Map<ConfigOption<?>, String> cfg = new HashMap<>();
	cfg.put(CHECKPOINTS_DIRECTORY, dir.toURI().toString());
	cfg.put(MAX_RETAINED_CHECKPOINTS, Integer.toString(Integer.MAX_VALUE)); // prevent deletion of checkpoint files while it's being checked and used
	StreamExecutionEnvironment env = env(isAligned, checkpointingInterval, cfg);
	env.getCheckpointConfig().enableExternalizedCheckpoints(RETAIN_ON_CANCELLATION);
	return env;
}
 
Example #23
Source File: ContinuousFileReaderOperatorBenchmark.java    From flink-benchmarks with Apache License 2.0 5 votes vote down vote up
@Benchmark
public void readFileSplit(FlinkEnvironmentContext context) throws Exception {
    StreamExecutionEnvironment env = context.env;
    env.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration());
    env
            .enableCheckpointing(100)
            .setParallelism(1)
            .addSource(new MockSourceFunction())
            .transform("fileReader", TypeInformation.of(String.class),
                    new ContinuousFileReaderOperatorFactory<>(new MockInputFormat()))
            .addSink(new LimitedSink());

    env.execute();
}
 
Example #24
Source File: WindowTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceWithProcesWindowFunctionEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DummyReducer reducer = new DummyReducer();

	DataStream<Tuple3<String, String, Integer>> window = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.reduce(reducer, new ProcessWindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple3<String, String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(new Tuple3<>(in.f0, in.f0, in.f1));
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #25
Source File: DistributedCacheViaBlobTestProgram.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final Path inputFile = Paths.get(params.getRequired("inputFile"));
		final Path inputDir = Paths.get(params.getRequired("inputDir"));

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(1);

		env.registerCachedFile(inputFile.toString(), "test_data", false);
		env.registerCachedFile(inputDir.toString(), "test_dir", false);

		final Path containedFile;
		try (Stream<Path> files = Files.list(inputDir)) {
			containedFile = files.findAny().orElseThrow(() -> new RuntimeException("Input directory must not be empty."));
		}

		env.fromElements(1)
			.map(new TestMapFunction(
				inputFile.toAbsolutePath().toString(),
				Files.size(inputFile),
				inputDir.toAbsolutePath().toString(),
				containedFile.getFileName().toString()))
			.writeAsText(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE);

		env.execute("Distributed Cache Via Blob Test Program");
	}
 
Example #26
Source File: FlinkPulsarTableITest.java    From pulsar-flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testStructTypesWithJavaArray() throws Exception {
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(see);

    String table = newTopic();

    sendTypedMessages(table, SchemaType.AVRO, faList, Optional.empty(), SchemaData.FA.class);

    tEnv
            .connect(getPulsarDescriptor(table))
            .inAppendMode()
            .registerTableSource(table);

    Table t = tEnv.scan(table).select("l");
    tEnv.toAppendStream(t, t.getSchema().toRowType())
            .map(new FailingIdentityMapper<Row>(faList.size()))
            .addSink(new SingletonStreamSink.StringSink<>()).setParallelism(1);

    try {
        see.execute("test struct in avro");
    } catch (Exception e) {

    }
    SingletonStreamSink.compareWithList(
            faList.subList(0, faList.size() - 1).stream().map(Objects::toString).collect(Collectors.toList()));
}
 
Example #27
Source File: BroadcastStream.java    From flink with Apache License 2.0 5 votes vote down vote up
protected BroadcastStream(
		final StreamExecutionEnvironment env,
		final DataStream<T> input,
		final MapStateDescriptor<?, ?>... broadcastStateDescriptors) {

	this.environment = requireNonNull(env);
	this.inputStream = requireNonNull(input);
	this.broadcastStateDescriptors = Arrays.asList(requireNonNull(broadcastStateDescriptors));
}
 
Example #28
Source File: AllWindowTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregateEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.aggregate(new DummyAggregationFunction());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();

	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();

	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator =
			(WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;

	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor);

	processElementAndEnsureOutput(
			winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #29
Source File: FromCollection.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    //创建一个List,里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 1));

    //通过List创建DataStream
    DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list);

    //通过多个Tuple2元素创建DataStream
    DataStream<Tuple2<String, Integer>> fromElementDataStream = env.fromElements(
            new Tuple2("ccc", 1),
            new Tuple2("ddd", 1),
            new Tuple2("aaa", 1)
    );

    //通过union将两个DataStream合成一个
    DataStream<Tuple2<String, Integer>> unionDataStream = fromCollectionDataStream.union(fromElementDataStream);

    //统计每个单词的数量
    unionDataStream
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : collection");
}
 
Example #30
Source File: SideOutputEvent.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);  //从 Kafka 获取到所有的数据流
    SingleOutputStreamOperator<MetricEvent> sideOutputData = data.process(new ProcessFunction<MetricEvent, MetricEvent>() {
        @Override
        public void processElement(MetricEvent metricEvent, Context context, Collector<MetricEvent> collector) throws Exception {
            String type = metricEvent.getTags().get("type");
            switch (type) {
                case "machine":
                    context.output(machineTag, metricEvent);
                case "docker":
                    context.output(dockerTag, metricEvent);
                case "application":
                    context.output(applicationTag, metricEvent);
                case "middleware":
                    context.output(middlewareTag, metricEvent);
                default:
                    collector.collect(metricEvent);
            }
        }
    });
    DataStream<MetricEvent> machine = sideOutputData.getSideOutput(machineTag);
    DataStream<MetricEvent> docker = sideOutputData.getSideOutput(dockerTag);
    DataStream<MetricEvent> application = sideOutputData.getSideOutput(applicationTag);
    DataStream<MetricEvent> middleware = sideOutputData.getSideOutput(middlewareTag);
}