Java Code Examples for org.apache.flink.api.java.DataSet#writeAsCsv()

The following examples show how to use org.apache.flink.api.java.DataSet#writeAsCsv() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExecutionPlanCreationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>");
		return;
	}

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0])
			.fieldDelimiter("\t").types(Long.class, Long.class);

	DataSet<Tuple2<Long, Long>> result = input.map(
			new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				public Tuple2<Long, Long> map(Tuple2<Long, Long> value){
					return new Tuple2<Long, Long>(value.f0, value.f1 + 1);
				}
	});
	result.writeAsCsv(args[1], "\n", "\t");
	env.execute();
}
 
Example 2
Source File: IncrementalSSSPITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testIncrementalSSSPNonSPEdge() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Vertex<Long, Double>> vertices = IncrementalSSSPData.getDefaultVertexDataSet(env);
	DataSet<Edge<Long, Double>> edges = IncrementalSSSPData.getDefaultEdgeDataSet(env);
	DataSet<Edge<Long, Double>> edgesInSSSP = IncrementalSSSPData.getDefaultEdgesInSSSP(env);
	// the edge to be removed is a non-SP edge
	Edge<Long, Double> edgeToBeRemoved = new Edge<>(3L, 5L, 5.0);

	Graph<Long, Double, Double> graph = Graph.fromDataSet(vertices, edges, env);
	// Assumption: all minimum weight paths are kept
	Graph<Long, Double, Double> ssspGraph = Graph.fromDataSet(vertices, edgesInSSSP, env);
	// remove the edge
	graph.removeEdge(edgeToBeRemoved);

	// configure the iteration
	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	if (IncrementalSSSP.isInSSSP(edgeToBeRemoved, edgesInSSSP)) {

		parameters.setDirection(EdgeDirection.IN);
		parameters.setOptDegrees(true);

		// run the scatter gather iteration to propagate info
		Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(
				new IncrementalSSSP.InvalidateMessenger(edgeToBeRemoved),
				new IncrementalSSSP.VertexDistanceUpdater(),
				IncrementalSSSPData.NUM_VERTICES, parameters);

		DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

		resultedVertices.writeAsCsv(resultPath, "\n", ",");
		env.execute();
	} else {
		vertices.writeAsCsv(resultPath, "\n", ",");
		env.execute();
	}

	expected = IncrementalSSSPData.VERTICES;
}
 
Example 3
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 4
Source File: ConnectedComponentsWithObjectMapITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 5
Source File: SingleSourceShortestPaths.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);

		// Execute the scatter-gather iteration
		Graph<Long, Double, Double> result = graph.runScatterGatherIteration(
				new MinDistanceMessenger(), new VertexDistanceUpdater(), maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Single Source Shortest Paths Example");
		} else {
			singleSourceShortestPaths.print();
		}

	}
 
Example 6
Source File: EmptyFieldsCountAccumulator.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}
 
Example 7
Source File: GSASingleSourceShortestPaths.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgeDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);

		// Execute the GSA iteration
		Graph<Long, Double, Double> result = graph.runGatherSumApplyIteration(
				new CalculateDistances(), new ChooseMinDistance(), new UpdateDistance(), maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("GSA Single Source Shortest Paths");
		} else {
			singleSourceShortestPaths.print();
		}

	}
 
Example 8
Source File: WordCount.java    From flink-simple-tutorial with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // make parameters available in the web interface
        env.getConfig().setGlobalJobParameters(params);

        // get input data
        DataSet<String> text;
        if (params.has("input")) {
            // read the text file from given input path
            text = env.readTextFile(params.get("input"));
        } else {
            // get default test text data
            System.out.println("Executing WordCount example with default input data set.");
            System.out.println("Use --input to specify file input.");
            text = WordCountData.getDefaultTextLineDataSet(env);
        }

        DataSet<Tuple2<String, Integer>> counts =
                // split up the lines in pairs (2-tuples) containing: (word,1)
                text.flatMap(new Tokenizer())
                        // group by the tuple field "0" and sum up tuple field "1"
                        .groupBy(0)
                        .sum(1);

        // emit result
        if (params.has("output")) {
            counts.writeAsCsv(params.get("output"), "\n", " ");
            // execute program
            env.execute("WordCount Example");
        } else {
            System.out.println("Printing result to stdout. Use --output to specify output path.");
            counts.print();
        }

    }
 
Example 9
Source File: PregelSSSP.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(), env);

		// Execute the vertex-centric iteration
		Graph<Long, Double, Double> result = graph.runVertexCentricIteration(
				new SSSPComputeFunction(srcVertexId), new SSSPCombiner(),
				maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");
			env.execute("Pregel Single Source Shortest Paths Example");
		} else {
			singleSourceShortestPaths.print();
		}

	}
 
Example 10
Source File: EmptyFieldsCountAccumulator.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}
 
Example 11
Source File: ConnectedComponentsWithDeferredUpdateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 12
Source File: ConnectedComponentsWithDeferredUpdateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 13
Source File: WebLogAnalysis.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}
 
Example 14
Source File: IncrementalSSSP.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String [] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		Edge<Long, Double> edgeToBeRemoved = getEdgeToBeRemoved();

		Graph<Long, Double, Double> graph = IncrementalSSSP.getGraph(env);

		// Assumption: all minimum weight paths are kept
		Graph<Long, Double, Double> ssspGraph = IncrementalSSSP.getSSSPGraph(env);

		// remove the edge
		graph.removeEdge(edgeToBeRemoved);

		// configure the iteration
		ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

		if (isInSSSP(edgeToBeRemoved, ssspGraph.getEdges())) {

			parameters.setDirection(EdgeDirection.IN);
			parameters.setOptDegrees(true);

			// run the scatter-gather iteration to propagate info
			Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(new InvalidateMessenger(edgeToBeRemoved),
					new VertexDistanceUpdater(), maxIterations, parameters);

			DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

			// Emit results
			if (fileOutput) {
				resultedVertices.writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				resultedVertices.print();
			}
		} else {
			// print the vertices
			if (fileOutput) {
				graph.getVertices().writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				graph.getVertices().print();
			}
		}
	}
 
Example 15
Source File: TPCHQuery10.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) {
			System.err.println("  This program expects data from the TPC-H benchmark as input data.");
			System.err.println("  Due to legal restrictions, we can not ship generated data.");
			System.err.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
			System.err.println("  Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]");
			return;
		}

		// get customer data set: (custkey, name, address, nationkey, acctbal)
		DataSet<Tuple5<Integer, String, String, Integer, Double>> customers =
			getCustomerDataSet(env, params.get("customer"));
		// get orders data set: (orderkey, custkey, orderdate)
		DataSet<Tuple3<Integer, Integer, String>> orders =
			getOrdersDataSet(env, params.get("orders"));
		// get lineitem data set: (orderkey, extendedprice, discount, returnflag)
		DataSet<Tuple4<Integer, Double, Double, String>> lineitems =
			getLineitemDataSet(env, params.get("lineitem"));
		// get nation data set: (nationkey, name)
		DataSet<Tuple2<Integer, String>> nations =
			getNationsDataSet(env, params.get("nation"));

		// orders filtered by year: (orderkey, custkey)
		DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear =
				// filter by year
				orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990)
				// project fields out that are no longer required
				.project(0, 1);

		// lineitems filtered by flag: (orderkey, revenue)
		DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag =
				// filter by flag
				lineitems.filter(lineitem -> lineitem.f3.equals("R"))
				// compute revenue and project out return flag
				// revenue per item = l_extendedprice * (1 - l_discount)
				.map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2)))
				.returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics

		// join orders with lineitems: (custkey, revenue)
		DataSet<Tuple2<Integer, Double>> revenueByCustomer =
				ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
									.where(0).equalTo(0)
									.projectFirst(1).projectSecond(1);

		revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1);

		// join customer with nation (custkey, name, address, nationname, acctbal)
		DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers
						.joinWithTiny(nations)
						.where(3).equalTo(0)
						.projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);

		// join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
		DataSet<Tuple6<Integer, String, String, String, Double, Double>> result =
				customerWithNation.join(revenueByCustomer)
				.where(0).equalTo(0)
				.projectFirst(0, 1, 2, 3, 4).projectSecond(1);

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("TPCH Query 10 Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}

	}
 
Example 16
Source File: EuclideanGraphWeighing.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Vertex<Long, Point>> vertices = getVerticesDataSet(env);

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Point, Double> graph = Graph.fromDataSet(vertices, edges, env);

		// the edge value will be the Euclidean distance between its src and trg vertex
		DataSet<Tuple3<Long, Long, Double>> edgesWithEuclideanWeight = graph.getTriplets()
				.map(new MapFunction<Triplet<Long, Point, Double>, Tuple3<Long, Long, Double>>() {

					@Override
					public Tuple3<Long, Long, Double> map(Triplet<Long, Point, Double> triplet)
							throws Exception {

						Vertex<Long, Point> srcVertex = triplet.getSrcVertex();
						Vertex<Long, Point> trgVertex = triplet.getTrgVertex();

						return new Tuple3<>(srcVertex.getId(), trgVertex.getId(),
							srcVertex.getValue().euclideanDistance(trgVertex.getValue()));
					}
				});

		Graph<Long, Point, Double> resultedGraph = graph.joinWithEdges(edgesWithEuclideanWeight,
				new EdgeJoinFunction<Double, Double>() {

					public Double edgeJoin(Double edgeValue, Double inputValue) {
						return inputValue;
					}
				});

		// retrieve the edges from the final result
		DataSet<Edge<Long, Double>> result = resultedGraph.getEdges();

		// emit result
		if (fileOutput) {
			result.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Euclidean Graph Weighing Example");
		} else {
			result.print();
		}

	}
 
Example 17
Source File: EnumTriangles.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read input data
		DataSet<Edge> edges;
		if (params.has("edges")) {
			edges = env.readCsvFile(params.get("edges"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.types(Integer.class, Integer.class)
					.map(new TupleEdgeConverter());
		} else {
			System.out.println("Executing EnumTriangles example with default edges data set.");
			System.out.println("Use --edges to specify file input.");
			edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
		}

		// project edges by vertex id
		DataSet<Edge> edgesById = edges
				.map(new EdgeByIdProjector());

		DataSet<Triad> triangles = edgesById
				// build triads
				.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
				// filter triads
				.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

		// emit result
		if (params.has("output")) {
			triangles.writeAsCsv(params.get("output"), "\n", ",");
			// execute program
			env.execute("Basic Triangle Enumeration Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			triangles.print();
		}
	}
 
Example 18
Source File: TPCHQuery3Parquet.java    From parquet-flinktacular with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


		// get input data
		DataSet<Lineitem> lineitems = getLineitemDataSet(env).map(new MapLineitems());
		DataSet<Order> orders = getOrdersDataSet(env).map(new MapOrders());
		DataSet<Customer> customers = getCustomerDataSet(env).map(new MapCustomers());
		
		// Join customers with orders and package them into a ShippingPriorityItem
		DataSet<ShippingPriorityItem> customerWithOrders =
			customers.join(orders).where(0).equalTo(1)
				.with(
					new JoinFunction<Customer, Order, ShippingPriorityItem>() {
						@Override
						public ShippingPriorityItem join(Customer c, Order o) {
							return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(),
								o.getShippriority());
						}
					});

		// Join the last join result with Lineitems
		DataSet<ShippingPriorityItem> result =
			customerWithOrders.join(lineitems).where(0).equalTo(0)
				.with(
					new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {
						@Override
						public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) {
							i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount()));
							return i;
						}
					})
					// Group by l_orderkey, o_orderdate and o_shippriority and compute revenue sum
				.groupBy(0, 2, 3)
				.aggregate(Aggregations.SUM, 1);

		// emit result
		result.writeAsCsv(outputPath, "\n", "|");

		// execute program
		env.execute("TPCH Query 3 - Parquet input");

		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}
 
Example 19
Source File: IncrementalSSSP.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String [] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		Edge<Long, Double> edgeToBeRemoved = getEdgeToBeRemoved();

		Graph<Long, Double, Double> graph = IncrementalSSSP.getGraph(env);

		// Assumption: all minimum weight paths are kept
		Graph<Long, Double, Double> ssspGraph = IncrementalSSSP.getSSSPGraph(env);

		// remove the edge
		graph.removeEdge(edgeToBeRemoved);

		// configure the iteration
		ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

		if (isInSSSP(edgeToBeRemoved, ssspGraph.getEdges())) {

			parameters.setDirection(EdgeDirection.IN);
			parameters.setOptDegrees(true);

			// run the scatter-gather iteration to propagate info
			Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(new InvalidateMessenger(edgeToBeRemoved),
					new VertexDistanceUpdater(), maxIterations, parameters);

			DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

			// Emit results
			if (fileOutput) {
				resultedVertices.writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				resultedVertices.print();
			}
		} else {
			// print the vertices
			if (fileOutput) {
				graph.getVertices().writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				graph.getVertices().print();
			}
		}
	}
 
Example 20
Source File: EuclideanGraphWeighing.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Vertex<Long, Point>> vertices = getVerticesDataSet(env);

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Point, Double> graph = Graph.fromDataSet(vertices, edges, env);

		// the edge value will be the Euclidean distance between its src and trg vertex
		DataSet<Tuple3<Long, Long, Double>> edgesWithEuclideanWeight = graph.getTriplets()
				.map(new MapFunction<Triplet<Long, Point, Double>, Tuple3<Long, Long, Double>>() {

					@Override
					public Tuple3<Long, Long, Double> map(Triplet<Long, Point, Double> triplet)
							throws Exception {

						Vertex<Long, Point> srcVertex = triplet.getSrcVertex();
						Vertex<Long, Point> trgVertex = triplet.getTrgVertex();

						return new Tuple3<>(srcVertex.getId(), trgVertex.getId(),
							srcVertex.getValue().euclideanDistance(trgVertex.getValue()));
					}
				});

		Graph<Long, Point, Double> resultedGraph = graph.joinWithEdges(edgesWithEuclideanWeight,
				new EdgeJoinFunction<Double, Double>() {

					public Double edgeJoin(Double edgeValue, Double inputValue) {
						return inputValue;
					}
				});

		// retrieve the edges from the final result
		DataSet<Edge<Long, Double>> result = resultedGraph.getEdges();

		// emit result
		if (fileOutput) {
			result.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Euclidean Graph Weighing Example");
		} else {
			result.print();
		}

	}