Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#readTextFile()

The following examples show how to use org.apache.flink.api.java.ExecutionEnvironment#readTextFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCountNestedPOJOITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WC> counts = text
			.flatMap(new Tokenizer())
			.groupBy("complex.someTest")
			.reduce(new ReduceFunction<WC>() {
				private static final long serialVersionUID = 1L;
				public WC reduce(WC value1, WC value2) {
					return new WC(value1.complex.someTest, value1.count + value2.count);
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 2
Source File: BatchWCJavaApp.java    From 163-bigdate-note with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        String input = "file:///D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSource<String> text = env.readTextFile(input);
//        text.print();
        text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception {
                String[] tokens = value.toLowerCase().split(",");
                for (String token : tokens) {
                    if (token.length() > 0) {
                        collector.collect(new Tuple2<String, Integer>(token, 1));
                    }
                }
            }

        }).groupBy(0).sum(1).print();
    }
 
Example 3
Source File: Readonly.java    From flink-perf with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	System.err.println("Using input="+args[0]);
	// get input data
	DataSet<String> text = env.readTextFile(args[0]);
	DataSet<String> res = text.filter(new FilterFunction<String>() {
		@Override
		public boolean filter(String value) throws Exception {
			return false;
		}
	});
	res.writeAsText("file:///tmp/out", WriteMode.OVERWRITE);
	
	// execute program
	env.execute("Read only job");
}
 
Example 4
Source File: WordCountSimplePOJOITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WC> counts = text
			.flatMap(new Tokenizer())
			.groupBy("word")
			.reduce(new ReduceFunction<WC>() {
				private static final long serialVersionUID = 1L;

				public WC reduce(WC value1, WC value2) {
					return new WC(value1.word, value1.count + value2.count);
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 5
Source File: RatingsDistribution.java    From flink-examples with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        // parse parameters
        ParameterTool params = ParameterTool.fromArgs(args);
        // path to ratings.csv file
        String ratingsCsvPath = params.getRequired("input");

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        DataSource<String> file = env.readTextFile(ratingsCsvPath);
        file.flatMap(new ExtractRating())
            .groupBy(0)
            // .reduceGroup(new SumRatingCount())
            .sum(1)
            .print();
    }
 
Example 6
Source File: WordCountSubclassPOJOITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WCBase> counts = text
			.flatMap(new Tokenizer())
			.groupBy("word")
			.reduce(new ReduceFunction<WCBase>() {
				private static final long serialVersionUID = 1L;
				public WCBase reduce(WCBase value1, WCBase value2) {
					WC wc1 = (WC) value1;
					WC wc2 = (WC) value2;
					return new WC(value1.word, wc1.secretCount + wc2.secretCount);
				}
			})
			.map(new MapFunction<WCBase, WCBase>() {
				@Override
				public WCBase map(WCBase value) throws Exception {
					WC wc = (WC) value;
					wc.count = wc.secretCount;
					return wc;
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 7
Source File: MusicProfiles.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> getMismatchesData(ExecutionEnvironment env) {
	if (fileOutput) {
		return env.readTextFile(mismatchesInputPath);
	} else {
		return MusicProfilesData.getMismatches(env);
	}
}
 
Example 8
Source File: WordCountSubclassPOJOITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WCBase> counts = text
			.flatMap(new Tokenizer())
			.groupBy("word")
			.reduce(new ReduceFunction<WCBase>() {
				private static final long serialVersionUID = 1L;
				public WCBase reduce(WCBase value1, WCBase value2) {
					WC wc1 = (WC) value1;
					WC wc2 = (WC) value2;
					return new WC(value1.word, wc1.secretCount + wc2.secretCount);
				}
			})
			.map(new MapFunction<WCBase, WCBase>() {
				@Override
				public WCBase map(WCBase value) throws Exception {
					WC wc = (WC) value;
					wc.count = wc.secretCount;
					return wc;
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 9
Source File: WordCountDemo.java    From flinkDemo with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);

    // create execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    // get input data
    DataSet<String> text;
    if (params.has("input")) {
        // read the text file from given input path
        text = env.readTextFile(params.get("input"));
    } else {
        // get default test text data
        System.out.println("Executing WordCount example with default input data set.");
        System.out.println("Use --input to specify file input.");
        text = WordCountData.getDefaultTextLineDataSet(env);
    }

    DataSet<Tuple2<String, Integer>> counts =
            // split up the lines in pairs (2-tuples) containing: (word,1)
            text.flatMap(new Tokenizer())
                    // group by the tuple field "0" and sum up tuple field "1"
                    .groupBy(0)
                    .sum(1);

    // emit result
    if (params.has("output")) {
        counts.writeAsCsv(params.get("output"), "\n", " ");
        // execute program
        env.execute("WordCount Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        counts.print();
    }

}
 
Example 10
Source File: HBaseWriteExample.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
	if (fileOutput) {
		// read the text file from given input path
		return env.readTextFile(textPath);
	} else {
		// get default test text data
		return getDefaultTextLineDataSet(env);
	}
}
 
Example 11
Source File: HBaseWriteExample.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
	if (fileOutput) {
		// read the text file from given input path
		return env.readTextFile(textPath);
	} else {
		// get default test text data
		return getDefaultTextLineDataSet(env);
	}
}
 
Example 12
Source File: WordCountHashAgg.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
	if(fileOutput) {
		// read the text file from given input path
		return env.readTextFile(textPath);
	} else {
		// get default test text data
		throw new RuntimeException("No build in data");
	}
}
 
Example 13
Source File: MusicProfiles.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> getMismatchesData(ExecutionEnvironment env) {
	if (fileOutput) {
		return env.readTextFile(mismatchesInputPath);
	} else {
		return MusicProfilesData.getMismatches(env);
	}
}
 
Example 14
Source File: AccumulatorITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> input = env.readTextFile(dataPath);

	input.flatMap(new TokenizeLine())
		.groupBy(0)
		.reduceGroup(new CountWords())
		.writeAsCsv(resultPath, "\n", " ");

	this.result = env.execute();
}
 
Example 15
Source File: WordCountSubclassInterfacePOJOITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WCBase> counts = text
			.flatMap(new Tokenizer())
			.groupBy("word")
			.reduce(new ReduceFunction<WCBase>() {
				private static final long serialVersionUID = 1L;
				public WCBase reduce(WCBase value1, WCBase value2) {
					WC wc1 = (WC) value1;
					WC wc2 = (WC) value2;
					int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
					wc1.secretCount.setCount(c);
					return wc1;
				}
			})
			.map(new MapFunction<WCBase, WCBase>() {
				@Override
				public WCBase map(WCBase value) throws Exception {
					WC wc = (WC) value;
					wc.count = wc.secretCount.getCount();
					return wc;
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 16
Source File: Prepare.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(final String[] args) throws Exception {
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> text = env.readTextFile(args[0]);
	DataSet<AvroLineitem> avro = text.map(new AvroLineItemMapper());
	avro.write(new AvroOutputFormat<AvroLineitem>(AvroLineitem.class), args[1]);
	env.execute("Lineitem Text 2 Avro converter");
}
 
Example 17
Source File: WordCountPojo.java    From flink-simple-tutorial with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // make parameters available in the web interface
        env.getConfig().setGlobalJobParameters(params);

        // get input data
        DataSet<String> text;
        if (params.has("input")) {
            // read the text file from given input path
            text = env.readTextFile(params.get("input"));
        } else {
            // get default test text data
            System.out.println("Executing WordCount example with default input data set.");
            System.out.println("Use --input to specify file input.");
            text = WordCountData.getDefaultTextLineDataSet(env);
        }

        DataSet<Word> counts =
                // split up the lines into Word objects (with frequency = 1)
                text.flatMap(new Tokenizer())
                        // group by the field word and sum up the frequency
                        .groupBy("word")
                        .reduce(new ReduceFunction<Word>() {
                            @Override
                            public Word reduce(Word value1, Word value2) throws Exception {
                                return new Word(value1.word, value1.frequency + value2.frequency);
                            }
                        });

        if (params.has("output")) {
            counts.writeAsText(params.get("output"), FileSystem.WriteMode.OVERWRITE);
            // execute program
            env.execute("WordCount-Pojo Example");
        } else {
            System.out.println("Printing result to stdout. Use --output to specify output path.");
            counts.print();
        }

    }
 
Example 18
Source File: WordCount.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
	if (fileOutput) {
		// read the text file from given input path
		return env.readTextFile(textPath);
	} else {
		// get default test text data
		return env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,",
				"And by opposing end them?--To die,--to sleep,--",
				"No more; and by a sleep to say we end",
				"The heartache, and the thousand natural shocks",
				"That flesh is heir to,--'tis a consummation",
				"Devoutly to be wish'd. To die,--to sleep;--",
				"To sleep! perchance to dream:--ay, there's the rub;",
				"For in that sleep of death what dreams may come,",
				"When we have shuffled off this mortal coil,",
				"Must give us pause: there's the respect",
				"That makes calamity of so long life;",
				"For who would bear the whips and scorns of time,",
				"The oppressor's wrong, the proud man's contumely,",
				"The pangs of despis'd love, the law's delay,",
				"The insolence of office, and the spurns",
				"That patient merit of the unworthy takes,",
				"When he himself might his quietus make",
				"With a bare bodkin? who would these fardels bear,",
				"To grunt and sweat under a weary life,",
				"But that the dread of something after death,--",
				"The undiscover'd country, from whose bourn",
				"No traveller returns,--puzzles the will,",
				"And makes us rather bear those ills we have",
				"Than fly to others that we know not of?",
				"Thus conscience does make cowards of us all;",
				"And thus the native hue of resolution",
				"Is sicklied o'er with the pale cast of thought;",
				"And enterprises of great pith and moment,",
				"With this regard, their currents turn awry,",
				"And lose the name of action.--Soft you now!",
				"The fair Ophelia!--Nymph, in thy orisons",
				"Be all my sins remember'd."
				);
	}
}
 
Example 19
Source File: GrepJob.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	String in = args[0];
	String out = args[1];
	System.err.println("Using input=" + in);
	System.err.println("Using output=" + out);

	String patterns[] = new String[args.length - 2];
	System.arraycopy(args, 2, patterns, 0, args.length - 2);
	System.err.println("Using patterns: " + Arrays.toString(patterns));

	// get input data
	DataSet<String> text = env.readTextFile(args[0]);
	for (int p = 0; p < patterns.length; p++) {
		final String pattern = patterns[p];
		DataSet<String> res = text.filter(new RichFilterFunction<String>() {
			private static final long serialVersionUID = 1L;

			Pattern p = Pattern.compile(pattern);
			LongCounter filterMatches = new LongCounter();
			LongCounter filterRecords = new LongCounter();

			@Override
			public void open(Configuration parameters) throws Exception {
				super.open(parameters);
				getRuntimeContext().addAccumulator("filterMatchCount-" + pattern, filterMatches);
				getRuntimeContext().addAccumulator("filterRecordCount-" + pattern, filterRecords);
			}

			@Override
			public boolean filter(String value) throws Exception {
				filterRecords.add(1L);
				if (value == null || value.length() == 0) {
					return false;
				}
				final Matcher m = p.matcher(value);
				if (m.find()) {
					filterMatches.add(1L);
					return true;
				}
				return false;
			}
		}).name("grep for " + pattern);
		res.writeAsText(out + "_" + pattern, FileSystem.WriteMode.OVERWRITE);
	}

	// execute program
	JobExecutionResult jobResult = env.execute("Flink Grep benchmark");
	System.err.println(AccumulatorHelper.getResultsFormated(jobResult.getAllAccumulatorResults()));
}
 
Example 20
Source File: WordCountWithoutCombine.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	
	if(!parseParameters(args)) {
		return;
	}
	
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	// get input data
	DataSet<String> text = env.readTextFile(textPath);
	
	DataSet<Tuple2<String, Integer>> counts = 
			// split up the lines in pairs (2-tuples) containing: (word,1)
			text.flatMap(new Tokenizer()).filter(new FilterFunction<Tuple2<String,Integer>>() {
				@Override
				public boolean filter(Tuple2<String, Integer> value) throws Exception {
					return !value.f1.equals("");
				}
			})
			// group by the tuple field "0" and sum up tuple field "1"
			.groupBy(0)
			.reduceGroup(new GroupReduceFunction<Tuple2<String,Integer>, Tuple2<String, Integer>>() {
				@Override
				public void reduce(
						Iterable<Tuple2<String, Integer>> valuesIt,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					Iterator<Tuple2<String, Integer>> values = valuesIt.iterator();
					int count = 0;
					Tuple2<String, Integer> val = null; // this always works because the iterator always has something.
					while(values.hasNext()) {
						val = values.next();
						count += val.f1;
					}
					val.f1 = count;
					out.collect(val);
				}
			});
	
	counts.writeAsText(outputPath);
	// counts.writeAsCsv(outputPath, "\n", " ");
	
	// execute program
	env.execute("WordCountWithoutcombine");
}