Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment.getExecutionEnvironment()

The following are Jave code examples for showing how to use getExecutionEnvironment() of the org.apache.flink.api.java.ExecutionEnvironment class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
+ Save this method
Example 1
Project: Mastering-Apache-Flink   File: BatchJob.java   View Source Code Vote up 7 votes
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

	DataSet<Record> csvInput = env
			.readCsvFile("D://NOTBACKEDUP//dataflow//flink-table//src//main//resources//data//olympic-athletes.csv")
			.pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total");
	// register the DataSet athletes as table "athletes" with fields derived
	// from the dataset
	Table atheltes = tableEnv.fromDataSet(csvInput);
	tableEnv.registerTable("athletes", atheltes);
	// run a SQL query on the Table and retrieve the result as a new Table
	Table groupedByCountry = tableEnv.sql("SELECT country, SUM(total) as frequency FROM athletes group by country");

	DataSet<Result> result = tableEnv.toDataSet(groupedByCountry, Result.class);

	result.print();

	Table groupedByGame = atheltes.groupBy("game").select("game, total.sum as frequency");

	DataSet<GameResult> gameResult = tableEnv.toDataSet(groupedByGame, GameResult.class);

	gameResult.print();

}
 
Example 2
Project: flink-examples   File: RatingsDistribution.java   View Source Code Vote up 6 votes
public static void main(String[] args) throws Exception {

        // parse parameters
        ParameterTool params = ParameterTool.fromArgs(args);
        // path to ratings.csv file
        String ratingsCsvPath = params.getRequired("input");

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        DataSource<String> file = env.readTextFile(ratingsCsvPath);
        file.flatMap(new ExtractRating())
            .groupBy(0)
            // .reduceGroup(new SumRatingCount())
            .sum(1)
            .print();
    }
 
Example 3
Project: flink-examples   File: Java8WordCount.java   View Source Code Vote up 6 votes
public static void main(String[] args) throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<String> lines = env.fromElements(
        "Apache Flink is a community-driven open source framework for distributed big data analytics,",
        "like Hadoop and Spark. The core of Apache Flink is a distributed streaming dataflow engine written",
        " in Java and Scala.[1][2] It aims to bridge the gap between MapReduce-like systems and shared-nothing",
        "parallel database systems. Therefore, Flink executes arbitrary dataflow programs in a data-parallel and",
        "pipelined manner.[3] Flink's pipelined runtime system enables the execution of bulk/batch and stream",
        "processing programs.[4][5] Furthermore, Flink's runtime supports the execution of iterative algorithms natively.[6]"
    );

    lines.flatMap((line, out) -> {
        String[] words = line.split("\\W+");
        for (String word : words) {
            out.collect(new Tuple2<>(word, 1));
        }
    })
    .returns(new TupleTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Integer.class)))
    .groupBy(0)
    .sum(1)
    .print();
}
 
Example 4
Project: flink-examples   File: GlobExample.java   View Source Code Vote up 6 votes
public static void main(String... args) throws  Exception {
    File txtFile = new File("/tmp/test/file.txt");
    File csvFile = new File("/tmp/test/file.csv");
    File binFile = new File("/tmp/test/file.bin");

    writeToFile(txtFile, "txt");
    writeToFile(csvFile, "csv");
    writeToFile(binFile, "bin");

    final ExecutionEnvironment env =
            ExecutionEnvironment.getExecutionEnvironment();
    final TextInputFormat format = new TextInputFormat(new Path("/tmp/test"));

    GlobFilePathFilter filesFilter = new GlobFilePathFilter(
            Collections.singletonList("**"),
            Arrays.asList("**/file.bin")
    );
    System.out.println(Arrays.toString(GlobFilePathFilter.class.getDeclaredFields()));
    format.setFilesFilter(filesFilter);

    DataSet<String> result = env.readFile(format, "/tmp");
    result.writeAsText("/temp/out");
    env.execute("GlobFilePathFilter-Test");
}
 
Example 5
Project: flink-java-project   File: WordCount.java   View Source Code Vote up 6 votes
public static void main(String[] args) throws Exception {

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,"
				);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new LineSplitter())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// execute and print result
		counts.print();

	}
 
Example 6
Project: flink   File: DataSinkTest.java   View Source Code Vote up 6 votes
@Test
public void testTupleTwoOrderExp() {

	final ExecutionEnvironment env = ExecutionEnvironment
			.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env
			.fromCollection(emptyTupleData, tupleTypeInfo);

	// should work
	try {
		tupleDs.writeAsText("/tmp/willNotHappen")
			.sortLocalOutput("f1", Order.ASCENDING)
			.sortLocalOutput("f4", Order.DESCENDING);
	} catch (Exception e) {
		Assert.fail();
	}
}
 
Example 7
Project: flink   File: FilterITCase.java   View Source Code Vote up 6 votes
@Test
public void testDisjunctivePreds() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	Table table = tableEnv.fromDataSet(input, "a, b, c");

	Table result = table
		.filter("a < 2 || a > 20");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "1,1,Hi\n" + "21,6,Comment#15\n";
	compareResultAsText(results, expected);
}
 
Example 8
Project: flink   File: TableSourceITCase.java   View Source Code Vote up 6 votes
@Test
public void testBatchTableSourceSQL() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
	BatchTableSource csvTable = CommonTestData.getCsvTableSource();

	tableEnv.registerTableSource("persons", csvTable);

	Table result = tableEnv
		.sql("SELECT `last`, FLOOR(id), score * 2 FROM persons WHERE score < 20");

	DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
	List<Row> results = resultSet.collect();

	String expected = "Smith,1,24.6\n" +
		"Miller,3,15.78\n" +
		"Smith,4,0.24\n" +
		"Miller,6,13.56\n" +
		"Williams,8,4.68\n";

	compareResultAsText(results, expected);
}
 
Example 9
Project: flink   File: OuterJoinITCase.java   View Source Code Vote up 6 votes
@Test
public void testJoinWithAtomicType2() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> ds1 = env.fromElements(1, 2);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env);

	DataSet<Tuple2<Integer, Tuple3<Integer, Long, String>>> joinDs = ds1
			.fullOuterJoin(ds2)
			.where("*")
			.equalTo(0)
			.with(new ProjectBothFunction<Integer, Tuple3<Integer, Long, String>>())
			.returns("Tuple2<java.lang.Object,java.lang.Object>");

	List<Tuple2<Integer, Tuple3<Integer, Long, String>>> result = joinDs.collect();

	String expected = "1,(1,1,Hi)\n" +
			"2,(2,2,Hello)\n" +
			"null,(3,2,Hello world)\n";

	compareResultAsTuples(result, expected);
}
 
Example 10
Project: flink   File: FlatMapITCase.java   View Source Code Vote up 6 votes
@Test
public void testNonPassingFlatMap() throws Exception {
	/*
	 * Test non-passing flatmap
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
	DataSet<String> nonPassingFlatMapDs = ds.
			flatMap(new FlatMapper1());

	List<String> result = nonPassingFlatMapDs.collect();

	String expected = "\n";

	compareResultAsText(result, expected);
}
 
Example 11
Project: flink   File: CalcITCase.java   View Source Code Vote up 6 votes
@Test
public void testIntegerBiggerThan128() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSet<Tuple3<Integer, Long, String>> input = env.fromElements(new Tuple3<>(300, 1L, "Hello"));
	Table table = tableEnv.fromDataSet(input, "a, b, c");

	Table result = table
		.filter("a = 300 ");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "300,1,Hello\n";
	compareResultAsText(results, expected);
}
 
Example 12
Project: flink   File: ExplainTest.java   View Source Code Vote up 6 votes
@Test
public void testFilterWithoutExtended() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

	DataSet<Tuple2<Integer, String>> input = env.fromElements(new Tuple2<>(1,"d"));
	Table table = tableEnv
		.fromDataSet(input, "a, b")
		.filter("a % 2 = 0");

	String result = tableEnv.explain(table).replaceAll("\\r\\n", "\n");
	try (Scanner scanner = new Scanner(new File(testFilePath +
		"../../src/test/scala/resources/testFilter0.out"))){
		String source = scanner.useDelimiter("\\A").next().replaceAll("\\r\\n", "\n");
		assertEquals(source, result);
	}
}
 
Example 13
Project: flink   File: JoinITCase.java   View Source Code Vote up 6 votes
@Test(expected = ValidationException.class)
public void testJoinWithNonMatchingKeyTypes() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	Table in1 = tableEnv.fromDataSet(ds1, "a, b, c");
	Table in2 = tableEnv.fromDataSet(ds2, "d, e, f, g, h");

	Table result = in1.join(in2)
		// Must fail. Types of join fields are not compatible (Integer and String)
		.where("a === g").select("c, g");

	tableEnv.toDataSet(result, Row.class).collect();
}
 
Example 14
Project: flink   File: ConnectedComponentsITCase.java   View Source Code Vote up 6 votes
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 15
Project: flink-examples   File: PregelShortestPath.java   View Source Code Vote up 5 votes
public static void main(String... args) throws Exception {

        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        List<Vertex<Integer, String>> vertices = Arrays.asList(
                new Vertex<>(1, "1"),
                new Vertex<>(2, "2"),
                new Vertex<>(3, "3"),
                new Vertex<>(4, "4"),
                new Vertex<>(5, "5")
        );

        List<Edge<Integer, Double>> edges = Arrays.asList(
                new Edge<>(1, 2, 2.0),
                new Edge<>(1, 3, 7.0),
                new Edge<>(2, 3, 2.0),
                new Edge<>(3, 2, 5.0),
                new Edge<>(2, 4, 4.0),
                new Edge<>(3, 4, 6.0),
                new Edge<>(3, 5, 3.0),
                new Edge<>(4, 5, 4.0),
                new Edge<>(5, 4, 1.0),
                new Edge<>(5, 1, 8.0)
        );

        Graph<Integer, String, Double> graph = Graph.fromCollection(vertices, edges, env);

        graph.run(new ShortestPath<>(1, 10)).print();
    }
 
Example 16
Project: flink-java-project   File: BatchJob.java   View Source Code Vote up 5 votes
public static void main(String[] args) throws Exception {
	// set up the batch execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	/**
	 * Here, you can start creating your execution plan for Flink.
	 *
	 * Start with getting some data from the environment, like
	 * 	env.readTextFile(textPath);
	 *
	 * then, transform the resulting DataSet<String> using operations
	 * like
	 * 	.filter()
	 * 	.flatMap()
	 * 	.join()
	 * 	.coGroup()
	 *
	 * and many more.
	 * Have a look at the programming guide for the Java API:
	 *
	 * http://flink.apache.org/docs/latest/apis/batch/index.html
	 *
	 * and the examples
	 *
	 * http://flink.apache.org/docs/latest/apis/batch/examples.html
	 *
	 */

	// execute program
	env.execute("Flink Batch Java API Skeleton");
}
 
Example 17
Project: big-data-benchmark   File: FlinkWordCount.java   View Source Code Vote up 5 votes
public static void main(String[] args) throws Exception {
    String inputPath = args[0];
    String outputPath = args[1] + "_" + System.currentTimeMillis();

    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = env.readTextFile(inputPath);
    DataSet<Tuple2<String, Long>> counts = text
            .<Tuple2<String, Long>>flatMap((line, out) -> {
                StringTokenizer tokenizer = new StringTokenizer(line);
                while (tokenizer.hasMoreTokens()) {
                    out.collect(new Tuple2<>(tokenizer.nextToken(), 1L));
                }
            })
            .returns(new TypeHint<Tuple2<String, Long>>() {
            })
            // group by the tuple field "0" and sum up tuple field "1"
            .groupBy(0)
            .sum(1);

    // emit result
    counts.writeAsCsv(outputPath);
    // execute program
    long t = System.currentTimeMillis();
    env.execute("Streaming WordCount Example");
    System.out.println("Time=" + (System.currentTimeMillis() - t));
}
 
Example 18
Project: flink   File: JavaTableEnvironmentITCase.java   View Source Code Vote up 5 votes
@Test
public void testFromNonAtomicAndNonComposite() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	List<Either<String, Integer>> data = new ArrayList<>();
	data.add(new Either.Left<>("Hello"));
	data.add(new Either.Right<>(42));
	data.add(new Either.Left<>("World"));

	Table table = tableEnv
		.fromDataSet(
			env.fromCollection(
				data,
				TypeInformation.of(new TypeHint<Either<String, Integer>>() { })
			),
			"either")
		.select("either");

	DataSet<Row> ds = tableEnv.toDataSet(table, Row.class);
	List<Row> results = ds.collect();
	String expected =
		"Left(Hello)\n" +
		"Left(World)\n" +
		"Right(42)\n";
	compareResultAsText(results, expected);
}
 
Example 19
Project: flink   File: PartitionOperatorTest.java   View Source Code Vote up 5 votes
@Test(expected = IllegalArgumentException.class)
public void testRangePartitionWithEmptyIndicesKey() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(new int[]{});
}
 
Example 20
Project: flink   File: DataSinkITCase.java   View Source Code Vote up 5 votes
@Test
public void testTupleSortingSingleDescParallelism1() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	ds.writeAsCsv(resultPath).sortLocalOutput(0, Order.DESCENDING).setParallelism(1);

	env.execute();

	String expected = "21,6,Comment#15\n" +
			"20,6,Comment#14\n" +
			"19,6,Comment#13\n" +
			"18,6,Comment#12\n" +
			"17,6,Comment#11\n" +
			"16,6,Comment#10\n" +
			"15,5,Comment#9\n" +
			"14,5,Comment#8\n" +
			"13,5,Comment#7\n" +
			"12,5,Comment#6\n" +
			"11,5,Comment#5\n" +
			"10,4,Comment#4\n" +
			"9,4,Comment#3\n" +
			"8,4,Comment#2\n" +
			"7,4,Comment#1\n" +
			"6,3,Luke Skywalker\n" +
			"5,3,I am fine.\n" +
			"4,3,Hello world, how are you?\n" +
			"3,2,Hello world\n" +
			"2,2,Hello\n" +
			"1,1,Hi\n";

	compareResultsByLinesInMemoryWithStrictOrder(expected, resultPath);
}
 
Example 21
Project: flink   File: MapITCase.java   View Source Code Vote up 5 votes
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> stringDs = env.fromElements(11, 12, 13, 14);
	DataSet<String> mappedDs = stringDs
		.map(Object::toString)
		.map (s -> s.replace("1", "2"))
		.map(Trade::new)
		.map(Trade::toString);
	mappedDs.writeAsText(resultPath);
	env.execute();
}
 
Example 22
Project: flink   File: JoinOperatorTest.java   View Source Code Vote up 5 votes
@Test
public void testJoinKeyExpressionsNested() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<NestedCustomType> ds1 = env.fromCollection(customNestedTypeData);
	DataSet<NestedCustomType> ds2 = env.fromCollection(customNestedTypeData);

	// should work
	try {
		ds1.join(ds2).where("myInt").equalTo("myInt");
	} catch (Exception e) {
		Assert.fail();
	}
}
 
Example 23
Project: flink   File: JoinWithEdgesITCase.java   View Source Code Vote up 5 votes
@Test
public void testWithNoCommonKeys() throws Exception {
	/*
	 * Test joinWithEdges with the input DataSet containing different keys than the edge DataSet
	 * - the iterator becomes empty.
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	Graph<Long, Long, Long> res = graph.joinWithEdges(TestGraphUtils.getLongLongLongTuple3Data(env),
		new DoubleValueMapper());

	DataSet<Edge<Long, Long>> data = res.getEdges();
	List<Edge<Long, Long>> result = data.collect();

	expectedResult = "1,2,24\n" +
		"1,3,26\n" +
		"2,3,46\n" +
		"3,4,68\n" +
		"3,5,35\n" +
		"4,5,45\n" +
		"5,1,51\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 24
Project: flink   File: PageRankITCase.java   View Source Code Vote up 5 votes
@Test
public void testGSAPageRankWithThreeIterationsAndNumOfVertices() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Double, Double> inputGraph = Graph.fromDataSet(
		PageRankData.getDefaultEdgeDataSet(env), new InitMapper(), env);

	List<Vertex<Long, Double>> result = inputGraph.run(new GSAPageRank<Long>(0.85, 3))
		.collect();

	compareWithDelta(result, 0.01);
}
 
Example 25
Project: flink   File: ReduceOnNeighborMethodsITCase.java   View Source Code Vote up 5 votes
@Test
public void testSumOfOutNeighborsMultipliedByTwo() throws Exception {
	/*
	 * Get the sum of out-neighbor values
	 * for each vertex as well as the sum of out-neighbor values multiplied by two.
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSumOfOutNeighborValues =
		graph.groupReduceOnNeighbors(new SumOutNeighborsMultipliedByTwo(), EdgeDirection.OUT);
	List<Tuple2<Long, Long>> result = verticesWithSumOfOutNeighborValues.collect();

	expectedResult = "1,5\n" +
		"1,10\n" +
		"2,3\n" +
		"2,6\n" +
		"3,9\n" +
		"3,18\n" +
		"4,5\n" +
		"4,10\n" +
		"5,1\n" +
		"5,2";

	compareResultAsTuples(result, expectedResult);
}
 
Example 26
Project: flink   File: SelectITCase.java   View Source Code Vote up 5 votes
@Test(expected = ValidationException.class)
public void testSelectInvalidField() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);

	tableEnv.fromDataSet(ds, "a, b, c")
		// Must fail. Field foo does not exist
		.select("a + 1, foo + 2");
}
 
Example 27
Project: flink-cassandra-connector-examples   File: FileWordCount.java   View Source Code Vote up 4 votes
public static void main(String[] args) throws Exception {

		// get the execution environment
		final ExecutionEnvironment job = ExecutionEnvironment.getExecutionEnvironment();
		String inputPath, outputPath = null;
		try {
			final ParameterTool params = ParameterTool.fromArgs(args);
			inputPath = params.get("input");

			if (params.has("output")) {
				outputPath = params.get("output");
			}
			// make parameters available in the web interface
			job.getConfig().setGlobalJobParameters(params);
		} catch (Exception e) {
			System.err.println("No input specified. Please run '" + org.apache.flink.connectors.cassandra.streaming.tuple.wordcount.FileWordCount.class.getSimpleName() +
					"--input <file-path>', where 'input' is the path to a text file");
			return;
		}

		DataServiceFacade dataService = new DataServiceFacade(DataEntityType.WORD_COUNT);

		dataService.setUpEmbeddedCassandra();
		dataService.setUpDataModel();

		LOG.info("Example starts!");

		// get input data by reading content from file
		DataSet<String> text = job.readTextFile(inputPath);

		DataSet<Tuple2<String, Long>> result =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {

					@Override
					public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
						// normalize and split the line
						String[] words = value.toLowerCase().split("\\W+");

						// emit the pairs
						for (String word : words) {
							//Do not accept empty word, since word is defined as primary key in C* table
							if (!word.isEmpty()) {
								out.collect(new Tuple2<String, Long>(word, 1L));
							}
						}
					}
				})
                // group by the tuple field "0" and sum up tuple field "1"
                .groupBy(0)
                .sum(1);

		//Update the results to C* sink
		CassandraOutputFormat sink = new CassandraOutputFormat("INSERT INTO " + WordCount.CQL_KEYSPACE_NAME + "." + WordCount.CQL_TABLE_NAME + "(word, count) " +
				"values (?, ?);", new ClusterBuilder() {
			@Override
			protected Cluster buildCluster(Cluster.Builder builder) {
				builder.addContactPoint("127.0.0.1");
				return builder.build();
			}
		});

		result.output(sink);

		// emit result
		if (outputPath != null) {
			result.writeAsText(outputPath);
		}

		// execute program
		job.execute("[BATCH] FileWordCount w/ C* Sink");

		LOG.info("20 sec sleep ...");
		Thread.sleep(20 * 1000);
		LOG.info("20 sec sleep ... DONE");
	}
 
Example 28
Project: flink-connectors   File: FlinkPravegaInputFormatITCase.java   View Source Code Vote up 4 votes
/**
 * Verifies that the input format:
 *  - correctly reads all records in a given set of multiple Pravega streams
 *  - allows multiple executions
 */
@Test
public void testBatchInput() throws Exception {
    final int numElements1 = 100;
    final int numElements2 = 300;

    // set up the stream
    final String streamName1 = RandomStringUtils.randomAlphabetic(20);
    final String streamName2 = RandomStringUtils.randomAlphabetic(20);

    final Set<String> streams = new HashSet<>();
    streams.add(streamName1);
    streams.add(streamName2);

    SETUP_UTILS.createTestStream(streamName1, 3);
    SETUP_UTILS.createTestStream(streamName2, 5);

    try (
            final EventStreamWriter<Integer> eventWriter1 = SETUP_UTILS.getIntegerWriter(streamName1);
            final EventStreamWriter<Integer> eventWriter2 = SETUP_UTILS.getIntegerWriter(streamName2);

            // create the producer that writes to the stream
            final ThrottledIntegerWriter producer1 = new ThrottledIntegerWriter(
                    eventWriter1,
                    numElements1,
                    numElements1 + 1, // no need to block writer for a batch test
                    0
            );

            final ThrottledIntegerWriter producer2 = new ThrottledIntegerWriter(
                    eventWriter2,
                    numElements2,
                    numElements2 + 1, // no need to block writer for a batch test
                    0
            )
    ) {
        // write batch input
        producer1.start();
        producer2.start();

        producer1.sync();
        producer2.sync();

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(3);

        // simple pipeline that reads from Pravega and collects the events
        DataSet<Integer> integers = env.createInput(
                new FlinkPravegaInputFormat<>(
                        SETUP_UTILS.getControllerUri(),
                        SETUP_UTILS.getScope(),
                        streams,
                        new IntDeserializer()),
                BasicTypeInfo.INT_TYPE_INFO
        );

        // verify that all events were read
        Assert.assertEquals(numElements1 + numElements2, integers.collect().size());

        // this verifies that the input format allows multiple passes
        Assert.assertEquals(numElements1 + numElements2, integers.collect().size());
    }
}
 
Example 29
Project: flink-connectors   File: FlinkPravegaInputFormatITCase.java   View Source Code Vote up 4 votes
/**
 * Verifies that the input format reads all records exactly-once in the presence of job failures.
 */
@Test
public void testBatchInputWithFailure() throws Exception {
    final int numElements = 100;

    // set up the stream
    final String streamName = RandomStringUtils.randomAlphabetic(20);
    SETUP_UTILS.createTestStream(streamName, 3);

    try (
            final EventStreamWriter<Integer> eventWriter = SETUP_UTILS.getIntegerWriter(streamName);

            // create the producer that writes to the stream
            final ThrottledIntegerWriter producer = new ThrottledIntegerWriter(
                    eventWriter,
                    numElements,
                    numElements + 1, // no need to block writer for a batch test
                    0
            )
    ) {
        // write batch input
        producer.start();
        producer.sync();

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000L));
        env.setParallelism(3);

        // simple pipeline that reads from Pravega and collects the events
        List<Integer> integers = env.createInput(
                new FlinkPravegaInputFormat<>(
                        SETUP_UTILS.getControllerUri(),
                        SETUP_UTILS.getScope(),
                        Collections.singleton(streamName),
                        new IntDeserializer()),
                BasicTypeInfo.INT_TYPE_INFO
        ).map(new FailOnceMapper(numElements / 2)).collect();

        // verify that the job did fail, and all events were still read
        Assert.assertTrue(FailOnceMapper.hasFailed());
        Assert.assertEquals(numElements, integers.size());

        FailOnceMapper.reset();
    }
}
 
Example 30
Project: flink   File: SortPartialReuseTest.java   View Source Code Vote up 4 votes
@Test
public void testCustomPartitioningNotReused() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
		
		input
			.partitionCustom(new Partitioner<Long>() {
				@Override
				public int partition(Long key, int numPartitions) { return 0; }
			}, 0)
			.map(new IdentityMapper<Tuple3<Long,Long,Long>>()).withForwardedFields("0", "1", "2")
			
			.groupBy(0, 1)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long,Long,Long>>()).withForwardedFields("0", "1", "2")
			
			.groupBy(1)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long,Long,Long>>())

			.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer2.getInput().getSource();
		SingleInputPlanNode reducer1 = (SingleInputPlanNode) combiner.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

		// should be locally forwarding, reusing sort and partitioning
		assertEquals(ShipStrategyType.PARTITION_HASH, reducer2.getInput().getShipStrategy());
		assertEquals(LocalStrategy.COMBININGSORT, reducer2.getInput().getLocalStrategy());
		
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
		assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy());
		
		assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy());
		assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 31
Project: flink   File: PageRank.java   View Source Code Vote up 4 votes
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);

		final int numPages = params.getInt("numPages", PageRankData.getNumberOfPages());
		final int maxIterations = params.getInt("iterations", 10);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make the parameters available to the web ui
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Long> pagesInput = getPagesDataSet(env, params);
		DataSet<Tuple2<Long, Long>> linksInput = getLinksDataSet(env, params);

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / numPages)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(maxIterations);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(DAMPENING_FACTOR, numPages));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		// emit result
		if (params.has("output")) {
			finalPageRanks.writeAsCsv(params.get("output"), "\n", " ");
			// execute program
			env.execute("Basic Page Rank Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			finalPageRanks.print();
		}
	}
 
Example 32
Project: Mastering-Apache-Flink   File: BatchJob.java   View Source Code Vote up 3 votes
public static void main(String[] args) throws Exception {
	// set up the batch execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Create graph by reading from CSV files
	DataSet<Tuple2<String, Double>> airportVertices = env
			.readCsvFile("D://work//Mastering Flink//Chapter 7//data//nodes.csv").types(String.class, Double.class);

	DataSet<Tuple3<String, String, Double>> airportEdges = env
			.readCsvFile("D://work//Mastering Flink//Chapter 7//data//edges.csv")
			.types(String.class, String.class, Double.class);

	Graph<String, Double, Double> graph = Graph.fromTupleDataSet(airportVertices, airportEdges, env);

	// Find out no. of airports and routes
	System.out.println("No. of Routes in Graph:" + graph.numberOfEdges());
	System.out.println("No. of Airports in Graph:" + graph.numberOfVertices());

	// define the maximum number of iterations
	int maxIterations = 10;

	// Execute the vertex-centric iteration
	Graph<String, Double, Double> result = graph.runVertexCentricIteration(new SSSPComputeFunction(),
			new SSSPCombiner(), maxIterations);

	// Extract the vertices as the result
	DataSet<Vertex<String, Double>> singleSourceShortestPaths = result.getVertices();
	
	singleSourceShortestPaths.print();
	
	

}