Java Code Examples for org.apache.flink.core.fs.FileSystem#WriteMode

The following examples show how to use org.apache.flink.core.fs.FileSystem#WriteMode . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CsvSinkBatchOp.java    From Alink with Apache License 2.0 7 votes vote down vote up
@Override
public CsvSinkBatchOp sinkFrom(BatchOperator in) {
    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode mode = FileSystem.WriteMode.NO_OVERWRITE;
    if (getOverwriteSink()) {
        mode = FileSystem.WriteMode.OVERWRITE;
    }

    DataSet<String> textLines = ((DataSet<Row>) in.getDataSet())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .map(new MapFunction<Row, String>() {
            @Override
            public String map(Row value) throws Exception {
                return (String) value.getField(0);
            }
        });

    textLines.writeAsText(filePath, mode).name("csv_sink").setParallelism(numFiles);
    return this;
}
 
Example 2
Source File: CsvSinkStreamOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Override
public CsvSinkStreamOp sinkFrom(StreamOperator in) {
    this.schema = in.getSchema();

    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final String rowDelimiter = getRowDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode writeMode;
    if (getOverwriteSink()) {
        writeMode = FileSystem.WriteMode.OVERWRITE;
    } else {
        writeMode = FileSystem.WriteMode.NO_OVERWRITE;
    }

    DataStream<Row> output = ((DataStream<Row>) in.getDataStream())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .setParallelism(numFiles);

    CsvTableSink cts = new CsvTableSink(filePath, rowDelimiter, numFiles, writeMode);
    cts.emitDataStream(output);
    return this;
}
 
Example 3
Source File: CsvTableSink.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * A simple {@link TableSink} to emit data as CSV files.
 *
 * @param path       The output path to write the Table to.
 * @param fieldDelim The field delimiter
 * @param numFiles   The number of files to write to
 * @param writeMode  The write mode to specify whether existing files are overwritten or not.
 */
public CsvTableSink(
	String path,
	String fieldDelim,
	int numFiles,
	FileSystem.WriteMode writeMode) {
	this.path = path;
	this.fieldDelim = fieldDelim;
	this.numFiles = numFiles;
	this.writeMode = writeMode;
}
 
Example 4
Source File: RheemFileOutputFormat.java    From rheem with Apache License 2.0 5 votes vote down vote up
/**
 * Initialization of the distributed file system if it is used.
 *
 * @param parallelism The task parallelism.
 */
@Override
public void initializeGlobal(int parallelism) throws IOException {
    try {
        final Path path = getOutputFilePath();
        final FileSystem fs = path.getFileSystem();

        // only distributed file systems can be initialized at start-up time.
        if (fs.isDistributedFS()) {

            final FileSystem.WriteMode writeMode = getWriteMode();
            final FileOutputFormat.OutputDirectoryMode outDirMode = getOutputDirectoryMode();

            if (parallelism == 1 && outDirMode == FileOutputFormat.OutputDirectoryMode.PARONLY) {
                // output is not written in parallel and should be written to a single file.
                // prepare distributed output path
                if (!fs.initOutPathDistFS(path, writeMode, false)) {
                    // output preparation failed! Cancel task.
                    throw new IOException("Output path could not be initialized.");
                }

            } else {
                // output should be written to a directory

                // only distributed file systems can be initialized at start-up time.
                if (!fs.initOutPathDistFS(path, writeMode, true)) {
                    throw new IOException("Output directory could not be created.");
                }
            }
        }
    }catch (Exception e){
        throw new RheemException(e);
    }
}
 
Example 5
Source File: CsvTableSink.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * A simple {@link TableSink} to emit data as CSV files.
 *
 * @param path       The output path to write the Table to.
 * @param fieldDelim The field delimiter
 * @param numFiles   The number of files to write to
 * @param writeMode  The write mode to specify whether existing files are overwritten or not.
 * @param fieldNames The field names of the table to emit.
 * @param fieldTypes The field types of the table to emit.
 */
public CsvTableSink(
		String path,
		String fieldDelim,
		int numFiles,
		FileSystem.WriteMode writeMode,
		String[] fieldNames,
		DataType[] fieldTypes) {
	this.path = path;
	this.fieldDelim = fieldDelim;
	this.numFiles = numFiles;
	this.writeMode = writeMode;
	this.fieldNames = fieldNames;
	this.fieldTypes = fieldTypes;
}
 
Example 6
Source File: CsvTableSink.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * A simple {@link TableSink} to emit data as CSV files.
 *
 * @param path       The output path to write the Table to.
 * @param fieldDelim The field delimiter
 * @param numFiles   The number of files to write to
 * @param writeMode  The write mode to specify whether existing files are overwritten or not.
 */
public CsvTableSink(
		String path,
		String fieldDelim,
		int numFiles,
		FileSystem.WriteMode writeMode) {
	this.path = path;
	this.fieldDelim = fieldDelim;
	this.numFiles = numFiles;
	this.writeMode = writeMode;
}
 
Example 7
Source File: RheemFileOutputFormat.java    From rheem with Apache License 2.0 4 votes vote down vote up
public void setWriteMode(FileSystem.WriteMode mode) {
    if (mode == null) {
        throw new NullPointerException();
    }
    this.writeMode = mode;
}
 
Example 8
Source File: RheemFileOutputFormat.java    From rheem with Apache License 2.0 4 votes vote down vote up
public FileSystem.WriteMode getWriteMode() {
    return this.writeMode;
}
 
Example 9
Source File: CsvTableSinkFactoryBase.java    From flink with Apache License 2.0 4 votes vote down vote up
protected CsvTableSink createTableSink(
		Boolean isStreaming,
		Map<String, String> properties) {

	DescriptorProperties params = new DescriptorProperties();
	params.putProperties(properties);

	// validate
	new FileSystemValidator().validate(params);
	new OldCsvValidator().validate(params);
	new SchemaValidator(isStreaming, false, false).validate(params);

	// build
	TableSchema tableSchema = TableSchemaUtils.getPhysicalSchema(params.getTableSchema(SCHEMA));

	// if a schema is defined, no matter derive schema is set or not, will use the defined schema
	final boolean hasSchema = params.hasPrefix(FORMAT_FIELDS);
	if (hasSchema) {
		TableSchema formatSchema = params.getTableSchema(FORMAT_FIELDS);
		if (!getFieldLogicalTypes(formatSchema).equals(getFieldLogicalTypes(tableSchema))) {
			throw new TableException(String.format(
					"Encodings that differ from the schema are not supported yet for" +
							" CsvTableSink, format schema is '%s', but table schema is '%s'.",
					formatSchema,
					tableSchema));
		}
	}

	String path = params.getString(CONNECTOR_PATH);
	String fieldDelimiter = params.getOptionalString(FORMAT_FIELD_DELIMITER).orElse(",");
	Optional<String> writeModeParm = params.getOptionalString(FORMAT_WRITE_MODE);
	FileSystem.WriteMode writeMode =
			(writeModeParm.isPresent()) ? FileSystem.WriteMode.valueOf(writeModeParm.get()) : null;
	int numFiles = params.getOptionalInt(FORMAT_NUM_FILES).orElse(-1);

	// bridge to java.sql.Timestamp/Time/Date
	DataType[] dataTypes = Arrays.stream(tableSchema.getFieldDataTypes())
		.map(dt -> {
			switch (dt.getLogicalType().getTypeRoot()) {
				case TIMESTAMP_WITHOUT_TIME_ZONE:
					return dt.bridgedTo(Timestamp.class);
				case TIME_WITHOUT_TIME_ZONE:
					return dt.bridgedTo(Time.class);
				case DATE:
					return dt.bridgedTo(Date.class);
				default:
					return dt;
			}
		})
		.toArray(DataType[]::new);

	return new CsvTableSink(
		path,
		fieldDelimiter,
		numFiles,
		writeMode,
		tableSchema.getFieldNames(),
		dataTypes);
}