org.apache.flink.api.java.io.TextOutputFormat Java Examples

The following examples show how to use org.apache.flink.api.java.io.TextOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataFlinkWriter.java    From toolbox with Apache License 2.0 6 votes vote down vote up
public static <T extends DataInstance> void writeDataToARFFFolder(DataFlink<T> data, String path) throws Exception {

        DataSet<T> dataSet = data.getDataSet();

        DataFlinkWriter.writeHeader(dataSet.getExecutionEnvironment(), data, path, false);

        dataSet.writeAsFormattedText(path + "/data/", FileSystem.WriteMode.OVERWRITE, new TextOutputFormat.TextFormatter<T>() {
                    @Override
                    public String format(T value) {
                        return ARFFDataWriter.dataInstanceToARFFString(value);
                    }
                }
        );

        dataSet.getExecutionEnvironment().execute();

    }
 
Example #2
Source File: DataFlinkWriter.java    From toolbox with Apache License 2.0 5 votes vote down vote up
public static <T extends DataInstance> void writeHeader(ExecutionEnvironment env, DataFlink<T> data, String path,
                                                         boolean includeRanges) {

    DataSource<String> name = env.fromElements("@relation " + data.getName());
    name.writeAsText(path + "/name.txt", FileSystem.WriteMode.OVERWRITE);

    DataSource<Attribute> attData = env.fromCollection(data.getAttributes().getFullListOfAttributes());

    attData.writeAsFormattedText(path + "/attributes.txt", FileSystem.WriteMode.OVERWRITE, new TextOutputFormat.TextFormatter<Attribute>() {
        @Override
        public String format(Attribute att) {
            return ARFFDataWriter.attributeToARFFStringWithIndex(att,includeRanges);
        }
    });
}
 
Example #3
Source File: HDFSTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testChangingFileNames() {
	org.apache.hadoop.fs.Path hdfsPath = new org.apache.hadoop.fs.Path(hdfsURI + "/hdfsTest");
	Path path = new Path(hdfsPath.toString());

	String type = "one";
	TextOutputFormat<String> outputFormat = new TextOutputFormat<>(path);

	outputFormat.setWriteMode(FileSystem.WriteMode.NO_OVERWRITE);
	outputFormat.setOutputDirectoryMode(FileOutputFormat.OutputDirectoryMode.ALWAYS);

	try {
		outputFormat.open(0, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		outputFormat.open(1, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		assertTrue("No result file present", hdfs.exists(hdfsPath));
		FileStatus[] files = hdfs.listStatus(hdfsPath);
		Assert.assertEquals(2, files.length);
		for (FileStatus file : files) {
			assertTrue("1".equals(file.getPath().getName()) || "2".equals(file.getPath().getName()));
		}

	} catch (IOException e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #4
Source File: HDFSTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testChangingFileNames() {
	org.apache.hadoop.fs.Path hdfsPath = new org.apache.hadoop.fs.Path(hdfsURI + "/hdfsTest");
	Path path = new Path(hdfsPath.toString());

	String type = "one";
	TextOutputFormat<String> outputFormat = new TextOutputFormat<>(path);

	outputFormat.setWriteMode(FileSystem.WriteMode.NO_OVERWRITE);
	outputFormat.setOutputDirectoryMode(FileOutputFormat.OutputDirectoryMode.ALWAYS);

	try {
		outputFormat.open(0, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		outputFormat.open(1, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		assertTrue("No result file present", hdfs.exists(hdfsPath));
		FileStatus[] files = hdfs.listStatus(hdfsPath);
		Assert.assertEquals(2, files.length);
		for (FileStatus file : files) {
			assertTrue("1".equals(file.getPath().getName()) || "2".equals(file.getPath().getName()));
		}

	} catch (IOException e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #5
Source File: FileSystemOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private OneInputStreamOperatorTestHarness<Row, Object> createSink(
		boolean override,
		boolean partition,
		boolean dynamicGrouped,
		LinkedHashMap<String, String> staticPartitions,
		AtomicReference<FileSystemOutputFormat<Row>> sinkRef) throws Exception {
	String[] columnNames = new String[]{"a", "b", "c"};
	String[] partitionColumns = partition ? new String[]{"c"} : new String[0];

	TableMetaStoreFactory msFactory = new FileSystemCommitterTest.TestMetaStoreFactory(
			new Path(outputFile.getPath()));
	FileSystemOutputFormat<Row> sink = new FileSystemOutputFormat.Builder<Row>()
			.setMetaStoreFactory(msFactory)
			.setTempPath(new Path(tmpFile.getPath()))
			.setOverwrite(override)
			.setPartitionColumns(partitionColumns)
			.setPartitionComputer(
					new RowPartitionComputer("default", columnNames, partitionColumns))
			.setFormatFactory(TextOutputFormat::new)
			.setDynamicGrouped(dynamicGrouped)
			.setStaticPartitions(staticPartitions)
			.build();

	sinkRef.set(sink);

	return new OneInputStreamOperatorTestHarness<>(
			new StreamSink<>(new OutputFormatSinkFunction<>(sink)),
			// test parallelism
			3, 3, 0);
}
 
Example #6
Source File: FunctionCompiler.java    From rheem with Apache License 2.0 5 votes vote down vote up
public <T> TextOutputFormat.TextFormatter<T> compileOutput(TransformationDescriptor<T, String> formattingDescriptor) {
    Function<T, String> format = formattingDescriptor.getJavaImplementation();
    return new TextOutputFormat.TextFormatter<T>(){

        @Override
        public String format(T value) {
            return format.apply(value);
        }
    };
}
 
Example #7
Source File: HDFSTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testChangingFileNames() {
	org.apache.hadoop.fs.Path hdfsPath = new org.apache.hadoop.fs.Path(hdfsURI + "/hdfsTest");
	Path path = new Path(hdfsPath.toString());

	String type = "one";
	TextOutputFormat<String> outputFormat = new TextOutputFormat<>(path);

	outputFormat.setWriteMode(FileSystem.WriteMode.NO_OVERWRITE);
	outputFormat.setOutputDirectoryMode(FileOutputFormat.OutputDirectoryMode.ALWAYS);

	try {
		outputFormat.open(0, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		outputFormat.open(1, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		assertTrue("No result file present", hdfs.exists(hdfsPath));
		FileStatus[] files = hdfs.listStatus(hdfsPath);
		Assert.assertEquals(2, files.length);
		for (FileStatus file : files) {
			assertTrue("1".equals(file.getPath().getName()) || "2".equals(file.getPath().getName()));
		}

	} catch (IOException e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #8
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * <pre>
 *             (SRC A)     
 *             /     \      
 *        (SINK A)    (SINK B)
 * </pre>
 */
@Test
public void testBranchingWithMultipleDataSinksSmall() {
	try {
		String outPath1 = "/tmp/out1";
		String outPath2 = "/tmp/out2";

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);

		source1.writeAsText(outPath1);
		source1.writeAsText(outPath2);

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		
		// ---------- check the optimizer plan ----------
		
		// number of sinks
		Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
		
		// sinks contain all sink paths
		Set<String> allSinks = new HashSet<String>();
		allSinks.add(outPath1);
		allSinks.add(outPath2);
		
		for (SinkPlanNode n : oPlan.getDataSinks()) {
			String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator()
					.getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString();
			Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
		}
		
		// ---------- compile plan to job graph to verify that no error is thrown ----------
		
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #9
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * <pre>
 *             (SRC A)     
 *             /     \      
 *        (SINK A)    (SINK B)
 * </pre>
 */
@Test
public void testBranchingWithMultipleDataSinksSmall() {
	try {
		String outPath1 = "/tmp/out1";
		String outPath2 = "/tmp/out2";

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);

		source1.writeAsText(outPath1);
		source1.writeAsText(outPath2);

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		
		// ---------- check the optimizer plan ----------
		
		// number of sinks
		Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
		
		// sinks contain all sink paths
		Set<String> allSinks = new HashSet<String>();
		allSinks.add(outPath1);
		allSinks.add(outPath2);
		
		for (SinkPlanNode n : oPlan.getDataSinks()) {
			String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator()
					.getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString();
			Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
		}
		
		// ---------- compile plan to job graph to verify that no error is thrown ----------
		
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #10
Source File: FormattingMapper.java    From flink with Apache License 2.0 4 votes vote down vote up
public FormattingMapper(TextOutputFormat.TextFormatter<T> formatter) {
	this.formatter = formatter;
}
 
Example #11
Source File: FormattingMapper.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public FormattingMapper(TextOutputFormat.TextFormatter<T> formatter) {
	this.formatter = formatter;
}
 
Example #12
Source File: FormattingMapper.java    From flink with Apache License 2.0 4 votes vote down vote up
public FormattingMapper(TextOutputFormat.TextFormatter<T> formatter) {
	this.formatter = formatter;
}
 
Example #13
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * <pre>
 *             (SRC A)     
 *             /     \      
 *        (SINK A)    (SINK B)
 * </pre>
 */
@Test
public void testBranchingWithMultipleDataSinksSmall() {
	try {
		String outPath1 = "/tmp/out1";
		String outPath2 = "/tmp/out2";

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);

		source1.writeAsText(outPath1);
		source1.writeAsText(outPath2);

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		
		// ---------- check the optimizer plan ----------
		
		// number of sinks
		Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
		
		// sinks contain all sink paths
		Set<String> allSinks = new HashSet<String>();
		allSinks.add(outPath1);
		allSinks.add(outPath2);
		
		for (SinkPlanNode n : oPlan.getDataSinks()) {
			String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator()
					.getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString();
			Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
		}
		
		// ---------- compile plan to job graph to verify that no error is thrown ----------
		
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #14
Source File: DataStream.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Writes a DataStream to the file specified by path in text format.
 *
 * <p>For every element of the DataStream the result of {@link Object#toString()} is written.
 *
 * @param path
 *            The path pointing to the location the text file is written to
 * @param writeMode
 *            Controls the behavior for existing files. Options are
 *            NO_OVERWRITE and OVERWRITE.
 *
 * @return The closed DataStream.
 */
@PublicEvolving
public DataStreamSink<T> writeAsText(String path, WriteMode writeMode) {
	TextOutputFormat<T> tof = new TextOutputFormat<>(new Path(path));
	tof.setWriteMode(writeMode);
	return writeUsingOutputFormat(tof);
}
 
Example #15
Source File: Calculator.java    From OSTMap with Apache License 2.0 3 votes vote down vote up
/**
 * run area calculation process
 * @param path path to config file
 * @throws Exception
 */
public void run(String path) throws Exception {

    readConfig(path);

    FlinkEnvManager fem = new FlinkEnvManager(path, "areaJob",
            TableIdentifier.RAW_TWITTER_DATA.get(),
            "HighScore");

    DataSet<Tuple2<Key,Value>> rawTwitterDataRows = fem.getDataFromAccumulo();

    DataSet<Tuple2<String,String>> geoList = rawTwitterDataRows.flatMap(new GeoExtrationFlatMap());

    DataSet<Tuple2<String,String>> reducedGroup = geoList
                                                    .groupBy(0)
                                                    .reduceGroup(new CoordGroupReduce());

    DataSet<Tuple3<String,Double,Integer>> userRanking = reducedGroup.flatMap(new GeoCalcFlatMap())
            .sortPartition(1, Order.DESCENDING).setParallelism(1);

    DataSet<Tuple2<Text,Mutation>> topTen = userRanking
            .groupBy(2)
            .reduceGroup(new TopTenGroupReduce("ac"));

    topTen.output(fem.getHadoopOF());

    fem.getExecutionEnvironment().execute("AreaProcess");

    TextOutputFormat<String> tof = new TextOutputFormat<>(new Path("file:///tmp/areauserranking"));
    tof.setWriteMode(FileSystem.WriteMode.OVERWRITE);

    userRanking.writeAsText("file:///tmp/areauserranking", FileSystem.WriteMode.OVERWRITE).setParallelism(1);



    fem.getExecutionEnvironment().execute("AreaCalculationProcess");

}
 
Example #16
Source File: PathCalculator.java    From OSTMap with Apache License 2.0 3 votes vote down vote up
/**
 * run area calculation process
 * @param path path to config file
 * @throws Exception
 */
public void run(String path) throws Exception {

    readConfig(path);

    FlinkEnvManager fem = new FlinkEnvManager(path, "pathJob",
            TableIdentifier.RAW_TWITTER_DATA.get(),
            "HighScore");


    DataSet<Tuple2<Key,Value>> rawTwitterDataRows = fem.getDataFromAccumulo();

    DataSet<Tuple2<String,String>> geoList = rawTwitterDataRows.flatMap(new PathGeoExtrationFlatMap());

    DataSet<Tuple2<String,String>> reducedGroup = geoList
                                                    .groupBy(0)
                                                    .reduceGroup(new PathCoordGroupReduce());

    DataSet<Tuple3<String,Double,Integer>> userRanking = reducedGroup.flatMap(new PathGeoCalcFlatMap())
            .sortPartition(1, Order.DESCENDING).setParallelism(1);

    DataSet<Tuple2<Text,Mutation>> topTen = userRanking
                                                    .groupBy(2)
                                                    .reduceGroup(new TopTenGroupReduce("td"));

    topTen.output(fem.getHadoopOF());

    fem.getExecutionEnvironment().execute("PathProcess");

    TextOutputFormat<String> tof = new TextOutputFormat<>(new Path("file:///tmp/pathuserranking"));
    tof.setWriteMode(FileSystem.WriteMode.OVERWRITE);

    userRanking.writeAsText("file:///tmp/pathuserranking", FileSystem.WriteMode.OVERWRITE).setParallelism(1);



    fem.getExecutionEnvironment().execute("PathCalculationProcess");

}
 
Example #17
Source File: DataStream.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
/**
 * Writes a DataStream to the file specified by path in text format.
 *
 * <p>For every element of the DataStream the result of {@link Object#toString()} is written.
 *
 * @param path
 *            The path pointing to the location the text file is written to
 * @param writeMode
 *            Controls the behavior for existing files. Options are
 *            NO_OVERWRITE and OVERWRITE.
 *
 * @return The closed DataStream.
 */
@PublicEvolving
public DataStreamSink<T> writeAsText(String path, WriteMode writeMode) {
	TextOutputFormat<T> tof = new TextOutputFormat<>(new Path(path));
	tof.setWriteMode(writeMode);
	return writeUsingOutputFormat(tof);
}
 
Example #18
Source File: DataStream.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Writes a DataStream to the file specified by path in text format.
 *
 * <p>For every element of the DataStream the result of {@link Object#toString()} is written.
 *
 * @param path
 *            The path pointing to the location the text file is written to
 * @param writeMode
 *            Controls the behavior for existing files. Options are
 *            NO_OVERWRITE and OVERWRITE.
 *
 * @return The closed DataStream.
 *
 * @deprecated Please use the {@link org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink} explicitly using the
 * {@link #addSink(SinkFunction)} method.
 */
@Deprecated
@PublicEvolving
public DataStreamSink<T> writeAsText(String path, WriteMode writeMode) {
	TextOutputFormat<T> tof = new TextOutputFormat<>(new Path(path));
	tof.setWriteMode(writeMode);
	return writeUsingOutputFormat(tof);
}
 
Example #19
Source File: DataStream.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataStream to the file specified by path in text format.
 *
 * <p>For every element of the DataStream the result of {@link Object#toString()} is written.
 *
 * @param path
 *            The path pointing to the location the text file is written to.
 *
 * @return The closed DataStream.
 */
@PublicEvolving
public DataStreamSink<T> writeAsText(String path) {
	return writeUsingOutputFormat(new TextOutputFormat<T>(new Path(path)));
}
 
Example #20
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataSet as text file(s) to the specified location.
 *
 * <p>For each element of the DataSet the result of {@link Object#toString()} is written.
 *
 * @param filePath The path pointing to the location the text file is written to.
 * @param writeMode Control the behavior for existing files. Options are NO_OVERWRITE and OVERWRITE.
 * @return The DataSink that writes the DataSet.
 *
 * @see TextOutputFormat
 * @see DataSet#writeAsText(String) Output files and directories
 */
public DataSink<T> writeAsText(String filePath, WriteMode writeMode) {
	TextOutputFormat<T> tof = new TextOutputFormat<>(new Path(filePath));
	tof.setWriteMode(writeMode);
	return output(tof);
}
 
Example #21
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataSet as text file(s) to the specified location.
 *
 * <p>For each element of the DataSet the result of {@link Object#toString()} is written.<br/>
 * <br/>
 * <span class="strong">Output files and directories</span><br/>
 * What output how writeAsText() method produces is depending on other circumstance
 * <ul>
 *   <li>
 * A directory is created and multiple files are written underneath. (Default behavior)<br/>
 * This sink creates a directory called "path1", and files "1", "2" ... are writen underneath depending on <a href="https://flink.apache.org/faq.html#what-is-the-parallelism-how-do-i-set-it">parallelism</a>
 * <pre>{@code .
 * └── path1/
 *     ├── 1
 *     ├── 2
 *     └── ...}</pre>
 * Code Example
 * <pre>{@code dataset.writeAsText("file:///path1");}</pre>
 *   </li>
 *   <li>
 * A single file called "path1" is created when parallelism is set to 1
 * <pre>{@code .
 * └── path1 }</pre>
 * Code Example
 * <pre>{@code // Parallelism is set to only this particular operation
 *dataset.writeAsText("file:///path1").setParallelism(1);
 *
 * // This will creates the same effect but note all operators' parallelism are set to one
 *env.setParallelism(1);
 *...
 *dataset.writeAsText("file:///path1"); }</pre>
 *   </li>
 *   <li>
 * A directory is always created when <a href="https://ci.apache.org/projects/flink/flink-docs-master/setup/config.html#file-systems">fs.output.always-create-directory</a>
 * is set to true in flink-conf.yaml file, even when parallelism is set to 1.
 * <pre>{@code .
 * └── path1/
 *     └── 1 }</pre>
 * Code Example
 * <pre>{@code // fs.output.always-create-directory = true
 *dataset.writeAsText("file:///path1").setParallelism(1); }</pre>
 *   </li>
 * </ul>
 *
 * @param filePath The path pointing to the location the text file or files under the directory is written to.
 * @return The DataSink that writes the DataSet.
 *
 * @see TextOutputFormat
 */
public DataSink<T> writeAsText(String filePath) {
	return output(new TextOutputFormat<T>(new Path(filePath)));
}
 
Example #22
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataSet as text file(s) to the specified location.
 *
 * <p>For each element of the DataSet the result of {@link Object#toString()} is written.<br/>
 * <br/>
 * <span class="strong">Output files and directories</span><br/>
 * What output how writeAsText() method produces is depending on other circumstance
 * <ul>
 *   <li>
 * A directory is created and multiple files are written underneath. (Default behavior)<br/>
 * This sink creates a directory called "path1", and files "1", "2" ... are writen underneath depending on <a href="https://flink.apache.org/faq.html#what-is-the-parallelism-how-do-i-set-it">parallelism</a>
 * <pre>{@code .
 * └── path1/
 *     ├── 1
 *     ├── 2
 *     └── ...}</pre>
 * Code Example
 * <pre>{@code dataset.writeAsText("file:///path1");}</pre>
 *   </li>
 *   <li>
 * A single file called "path1" is created when parallelism is set to 1
 * <pre>{@code .
 * └── path1 }</pre>
 * Code Example
 * <pre>{@code // Parallelism is set to only this particular operation
 *dataset.writeAsText("file:///path1").setParallelism(1);
 *
 * // This will creates the same effect but note all operators' parallelism are set to one
 *env.setParallelism(1);
 *...
 *dataset.writeAsText("file:///path1"); }</pre>
 *   </li>
 *   <li>
 * A directory is always created when <a href="https://ci.apache.org/projects/flink/flink-docs-master/setup/config.html#file-systems">fs.output.always-create-directory</a>
 * is set to true in flink-conf.yaml file, even when parallelism is set to 1.
 * <pre>{@code .
 * └── path1/
 *     └── 1 }</pre>
 * Code Example
 * <pre>{@code // fs.output.always-create-directory = true
 *dataset.writeAsText("file:///path1").setParallelism(1); }</pre>
 *   </li>
 * </ul>
 *
 * @param filePath The path pointing to the location the text file or files under the directory is written to.
 * @return The DataSink that writes the DataSet.
 *
 * @see TextOutputFormat
 */
public DataSink<T> writeAsText(String filePath) {
	return output(new TextOutputFormat<T>(new Path(filePath)));
}
 
Example #23
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataSet as text file(s) to the specified location.
 *
 * <p>For each element of the DataSet the result of {@link Object#toString()} is written.
 *
 * @param filePath The path pointing to the location the text file is written to.
 * @param writeMode Control the behavior for existing files. Options are NO_OVERWRITE and OVERWRITE.
 * @return The DataSink that writes the DataSet.
 *
 * @see TextOutputFormat
 * @see DataSet#writeAsText(String) Output files and directories
 */
public DataSink<T> writeAsText(String filePath, WriteMode writeMode) {
	TextOutputFormat<T> tof = new TextOutputFormat<>(new Path(filePath));
	tof.setWriteMode(writeMode);
	return output(tof);
}
 
Example #24
Source File: DataStream.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataStream to the file specified by path in text format.
 *
 * <p>For every element of the DataStream the result of {@link Object#toString()} is written.
 *
 * @param path
 *            The path pointing to the location the text file is written to.
 *
 * @return The closed DataStream.
 *
 * @deprecated Please use the {@link org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink} explicitly using the
 * {@link #addSink(SinkFunction)} method.
 */
@Deprecated
@PublicEvolving
public DataStreamSink<T> writeAsText(String path) {
	return writeUsingOutputFormat(new TextOutputFormat<T>(new Path(path)));
}
 
Example #25
Source File: DataStream.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataStream to the file specified by path in text format.
 *
 * <p>For every element of the DataStream the result of {@link Object#toString()} is written.
 *
 * @param path
 *            The path pointing to the location the text file is written to.
 *
 * @return The closed DataStream.
 */
@PublicEvolving
public DataStreamSink<T> writeAsText(String path) {
	return writeUsingOutputFormat(new TextOutputFormat<T>(new Path(path)));
}
 
Example #26
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataSet as text file(s) to the specified location.
 *
 * <p>For each element of the DataSet the result of {@link Object#toString()} is written.
 *
 * @param filePath The path pointing to the location the text file is written to.
 * @param writeMode Control the behavior for existing files. Options are NO_OVERWRITE and OVERWRITE.
 * @return The DataSink that writes the DataSet.
 *
 * @see TextOutputFormat
 * @see DataSet#writeAsText(String) Output files and directories
 */
public DataSink<T> writeAsText(String filePath, WriteMode writeMode) {
	TextOutputFormat<T> tof = new TextOutputFormat<>(new Path(filePath));
	tof.setWriteMode(writeMode);
	return output(tof);
}
 
Example #27
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Writes a DataSet as text file(s) to the specified location.
 *
 * <p>For each element of the DataSet the result of {@link Object#toString()} is written.<br/>
 * <br/>
 * <span class="strong">Output files and directories</span><br/>
 * What output how writeAsText() method produces is depending on other circumstance
 * <ul>
 *   <li>
 * A directory is created and multiple files are written underneath. (Default behavior)<br/>
 * This sink creates a directory called "path1", and files "1", "2" ... are writen underneath depending on <a href="https://flink.apache.org/faq.html#what-is-the-parallelism-how-do-i-set-it">parallelism</a>
 * <pre>{@code .
 * └── path1/
 *     ├── 1
 *     ├── 2
 *     └── ...}</pre>
 * Code Example
 * <pre>{@code dataset.writeAsText("file:///path1");}</pre>
 *   </li>
 *   <li>
 * A single file called "path1" is created when parallelism is set to 1
 * <pre>{@code .
 * └── path1 }</pre>
 * Code Example
 * <pre>{@code // Parallelism is set to only this particular operation
 *dataset.writeAsText("file:///path1").setParallelism(1);
 *
 * // This will creates the same effect but note all operators' parallelism are set to one
 *env.setParallelism(1);
 *...
 *dataset.writeAsText("file:///path1"); }</pre>
 *   </li>
 *   <li>
 * A directory is always created when <a href="https://ci.apache.org/projects/flink/flink-docs-master/setup/config.html#file-systems">fs.output.always-create-directory</a>
 * is set to true in flink-conf.yaml file, even when parallelism is set to 1.
 * <pre>{@code .
 * └── path1/
 *     └── 1 }</pre>
 * Code Example
 * <pre>{@code // fs.output.always-create-directory = true
 *dataset.writeAsText("file:///path1").setParallelism(1); }</pre>
 *   </li>
 * </ul>
 *
 * @param filePath The path pointing to the location the text file or files under the directory is written to.
 * @return The DataSink that writes the DataSet.
 *
 * @see TextOutputFormat
 */
public DataSink<T> writeAsText(String filePath) {
	return output(new TextOutputFormat<T>(new Path(filePath)));
}