package com.example.flink; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.operators.DataSource; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.typeutils.TupleTypeInfo; /** * Implementing word count example using Java 8 lambdas */ public class Java8WordCount { public static void main(String[] args) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<String> lines = env.fromElements( "Apache Flink is a community-driven open source framework for distributed big data analytics,", "like Hadoop and Spark. The core of Apache Flink is a distributed streaming dataflow engine written", " in Java and Scala.[1][2] It aims to bridge the gap between MapReduce-like systems and shared-nothing", "parallel database systems. Therefore, Flink executes arbitrary dataflow programs in a data-parallel and", "pipelined manner.[3] Flink's pipelined runtime system enables the execution of bulk/batch and stream", "processing programs.[4][5] Furthermore, Flink's runtime supports the execution of iterative algorithms natively.[6]" ); lines.flatMap((line, out) -> { String[] words = line.split("\\W+"); for (String word : words) { out.collect(new Tuple2<>(word, 1)); } }) .returns(new TupleTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Integer.class))) .groupBy(0) .sum(1) .print(); } }