Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#writeAsCsv()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#writeAsCsv() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CsvOutputFormatITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testProgram() throws Exception {
	String resultPath = getTempDirPath("result");

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> text = env.fromElements(WordCountData.TEXT);

	DataStream<Tuple2<String, Integer>> counts = text
			.flatMap(new Tokenizer())
			.keyBy(0).sum(1);

	counts.writeAsCsv(resultPath);

	env.execute("WriteAsCsvTest");

	//Strip the parentheses from the expected text like output
	compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES
			.replaceAll("[\\\\(\\\\)]", ""), resultPath);
}
 
Example 2
Source File: CsvOutputFormatITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testProgram() throws Exception {
	String resultPath = getTempDirPath("result");

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> text = env.fromElements(WordCountData.TEXT);

	DataStream<Tuple2<String, Integer>> counts = text
			.flatMap(new Tokenizer())
			.keyBy(0).sum(1);

	counts.writeAsCsv(resultPath);

	env.execute("WriteAsCsvTest");

	//Strip the parentheses from the expected text like output
	compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES
			.replaceAll("[\\\\(\\\\)]", ""), resultPath);
}
 
Example 3
Source File: BipartitenessCheckExample.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {

	// Set up the environment
	if (!parseParameters(args)) {
		return;
	}

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	GraphStream<Long, NullValue, NullValue> graph = new SimpleEdgeStream<>(getEdgesDataSet(env), env);
	DataStream<Candidates> bipartition = graph.aggregate
			(new BipartitenessCheck<Long, NullValue>((long) 500));
	// Emit the results
	if (fileOutput) {
		bipartition.writeAsCsv(outputPath);
	} else {
		bipartition.print();
	}

	env.execute("Bipartiteness Check");
}
 
Example 4
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testFoldNeighborsDefault() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,25\n" +
               "2,23\n" +
               "3,69\n" +
               "4,45\n" +
               "5,51\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS))
		.foldNeighbors(new Tuple2<Long, Long>(0l, 0l), new SumEdgeValues());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 5
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testFoldNeighborsIn() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,51\n" +
               "2,12\n" +
               "3,36\n" +
               "4,34\n" +
               "5,80\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS), EdgeDirection.IN)
		.foldNeighbors(new Tuple2<Long, Long>(0l, 0l), new SumEdgeValues());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 6
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testFoldNeighborsAll() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,76\n" +
               "2,35\n" +
               "3,105\n" +
               "4,79\n" +
               "5,131\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS), EdgeDirection.ALL)
		.foldNeighbors(new Tuple2<Long, Long>(0l, 0l), new SumEdgeValues());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 7
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceOnNeighborsDefault() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,25\n" +
               "2,23\n" +
               "3,69\n" +
               "4,45\n" +
               "5,51\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS))
		.reduceOnEdges(new SumEdgeValuesReduce());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 8
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceOnNeighborsIn() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,51\n" +
               "2,12\n" +
               "3,36\n" +
               "4,34\n" +
               "5,80\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS), EdgeDirection.IN)
			.reduceOnEdges(new SumEdgeValuesReduce());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 9
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceOnNeighborsAll() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,76\n" +
               "2,35\n" +
               "3,105\n" +
               "4,79\n" +
               "5,131\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS), EdgeDirection.ALL)
			.reduceOnEdges(new SumEdgeValuesReduce());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 10
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testApplyOnNeighborsDefault() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,small\n" +
               "2,small\n" +
               "3,big\n" +
               "4,small\n" +
               "5,big\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, String>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS))
			.applyOnNeighbors(new SumEdgeValuesApply());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 11
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testApplyOnNeighborsIn() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,big\n" +
               "2,small\n" +
               "3,small\n" +
               "4,small\n" +
               "5,big\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, String>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS), EdgeDirection.IN)
			.applyOnNeighbors(new SumEdgeValuesApply());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 12
Source File: TestSlice.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testApplyOnNeighborsAll() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,big\n" +
               "2,small\n" +
               "3,big\n" +
               "4,big\n" +
               "5,big\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, String>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS), EdgeDirection.ALL)
			.applyOnNeighbors(new SumEdgeValuesApply());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 13
Source File: CsvOutputFormatITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testProgram() throws Exception {
	String resultPath = getTempDirPath("result");

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> text = env.fromElements(WordCountData.TEXT);

	DataStream<Tuple2<String, Integer>> counts = text
			.flatMap(new Tokenizer())
			.keyBy(0).sum(1);

	counts.writeAsCsv(resultPath);

	env.execute("WriteAsCsvTest");

	//Strip the parentheses from the expected text like output
	compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES
			.replaceAll("[\\\\(\\\\)]", ""), resultPath);
}
 
Example 14
Source File: HTMIntegrationTest.java    From flink-htm with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void testSimple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);

    final int NUM_CYCLES = 400;
    final int INPUT_GROUP_COUNT = 7; // Days of Week
    List<TestHarness.DayDemoRecord> records = IntStream.range(0, NUM_CYCLES)
            .flatMap(c -> IntStream.range(0, INPUT_GROUP_COUNT))
            .mapToObj(day -> new TestHarness.DayDemoRecord(day))
            .collect(Collectors.toList());

    DataStream<TestHarness.DayDemoRecord> input = env.fromCollection(records);

    DataStream<Tuple3<Integer,Double,Double>> result = HTM
            .learn(input, new TestHarness.DayDemoNetworkFactory())
            .resetOn(new ResetFunction<TestHarness.DayDemoRecord>() {
                @Override
                public boolean reset(TestHarness.DayDemoRecord value) throws Exception {
                    return value.dayOfWeek == 0;
                }
            })
            .select(new InferenceSelectFunction<TestHarness.DayDemoRecord, Tuple3<Integer,Double,Double>>() {
                @Override
                public Tuple3<Integer,Double,Double> select(Tuple2<TestHarness.DayDemoRecord,NetworkInference> inference) throws Exception {
                    return new Tuple3<>(
                            inference.f0.dayOfWeek,
                            (Double) inference.f1.getClassification("dayOfWeek").getMostProbableValue(1),
                            inference.f1.getAnomalyScore());
                }
            });

    result.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);

    result.print();

    env.execute();
}
 
Example 15
Source File: BroadcastTriangleCount.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		DataStream<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

		int localSamples = samples / env.getParallelism();

		// Count triangles
		DataStream<Tuple2<Integer, Integer>> triangles = edges
				.broadcast()
				.flatMap(new TriangleSampler(localSamples, vertexCount))
				.flatMap(new TriangleSummer(samples, vertexCount))
				.setParallelism(1);

		// Emit the results
		if (fileOutput) {
			triangles.writeAsCsv(outputPath);
		} else {
			triangles.print();
		}

		env.execute("Broadcast Triangle Count");
	}
 
Example 16
Source File: IncidenceSamplingTriangleCount.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		DataStream<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

		int localSamples = samples / env.getParallelism();

		// Count triangles
		DataStream<Tuple2<Integer, Integer>> triangles = edges
				.flatMap(new EdgeSampleMapper(localSamples, env.getParallelism()))
				.setParallelism(1)
				.keyBy(0)
				.flatMap(new TriangleSampleMapper(localSamples, vertexCount))
				.flatMap(new TriangleSummer(samples, vertexCount))
				.setParallelism(1);

		// Emit the results
		if (fileOutput) {
			triangles.writeAsCsv(outputPath);
		} else {
			triangles.print();
		}

		env.execute("Incidence Sampling Triangle Count");
	}