Java Code Examples for org.apache.flink.api.common.io.OutputFormat

The following examples show how to use org.apache.flink.api.common.io.OutputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: flink   Author: apache   File: DataStream.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Writes a DataStream to the file specified by the path parameter. The
 * writing is performed periodically every millis milliseconds.
 *
 * <p>For every field of an element of the DataStream the result of {@link Object#toString()}
 * is written. This method can only be used on data streams of tuples.
 *
 * @param path
 *            the path pointing to the location the text file is written to
 * @param writeMode
 *            Controls the behavior for existing files. Options are
 *            NO_OVERWRITE and OVERWRITE.
 * @param rowDelimiter
 *            the delimiter for two rows
 * @param fieldDelimiter
 *            the delimiter for two fields
 *
 * @return the closed DataStream
 *
 * @deprecated Please use the {@link org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink} explicitly using the
 * {@link #addSink(SinkFunction)} method.
 */
@SuppressWarnings("unchecked")
@Deprecated
@PublicEvolving
public <X extends Tuple> DataStreamSink<T> writeAsCsv(
		String path,
		WriteMode writeMode,
		String rowDelimiter,
		String fieldDelimiter) {
	Preconditions.checkArgument(
		getType().isTupleType(),
		"The writeAsCsv() method can only be used on data streams of tuples.");

	CsvOutputFormat<X> of = new CsvOutputFormat<>(
		new Path(path),
		rowDelimiter,
		fieldDelimiter);

	if (writeMode != null) {
		of.setWriteMode(writeMode);
	}

	return writeUsingOutputFormat((OutputFormat<T>) of);
}
 
Example #2
Source Project: Flink-CEPplus   Author: ljygz   File: DataStream.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Writes a DataStream to the file specified by the path parameter. The
 * writing is performed periodically every millis milliseconds.
 *
 * <p>For every field of an element of the DataStream the result of {@link Object#toString()}
 * is written. This method can only be used on data streams of tuples.
 *
 * @param path
 *            the path pointing to the location the text file is written to
 * @param writeMode
 *            Controls the behavior for existing files. Options are
 *            NO_OVERWRITE and OVERWRITE.
 * @param rowDelimiter
 *            the delimiter for two rows
 * @param fieldDelimiter
 *            the delimiter for two fields
 *
 * @return the closed DataStream
 */
@SuppressWarnings("unchecked")
@PublicEvolving
public <X extends Tuple> DataStreamSink<T> writeAsCsv(
		String path,
		WriteMode writeMode,
		String rowDelimiter,
		String fieldDelimiter) {
	Preconditions.checkArgument(
		getType().isTupleType(),
		"The writeAsCsv() method can only be used on data streams of tuples.");

	CsvOutputFormat<X> of = new CsvOutputFormat<>(
		new Path(path),
		rowDelimiter,
		fieldDelimiter);

	if (writeMode != null) {
		of.setWriteMode(writeMode);
	}

	return writeUsingOutputFormat((OutputFormat<T>) of);
}
 
Example #3
Source Project: flink   Author: flink-tpc-ds   File: CassandraConnectorITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCassandraBatchRowFormat() throws Exception {
	OutputFormat<Row> sink = new CassandraRowOutputFormat(injectTableName(INSERT_DATA_QUERY), builder);
	try {
		sink.configure(new Configuration());
		sink.open(0, 1);
		for (Row value : rowCollection) {
			sink.writeRecord(value);
		}
	} finally {

		sink.close();
	}

	ResultSet rs = session.execute(injectTableName(SELECT_DATA_QUERY));
	List<com.datastax.driver.core.Row> rows = rs.all();
	Assert.assertEquals(rowCollection.size(), rows.size());
}
 
Example #4
Source Project: flink   Author: apache   File: JobGraphGenerator.java    License: Apache License 2.0 6 votes vote down vote up
private JobVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException {
	final InputOutputFormatVertex vertex = new InputOutputFormatVertex(node.getNodeName());
	final TaskConfig config = new TaskConfig(vertex.getConfiguration());

	final OperatorID operatorID = new OperatorID();

	vertex.setResources(node.getMinResources(), node.getPreferredResources());
	vertex.setInvokableClass(DataSinkTask.class);
	vertex.setFormatDescription(operatorID, getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper()));

	// set user code
	new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader())
		.addOutputFormat(operatorID, (UserCodeWrapper<? extends OutputFormat<?>>) node.getProgramOperator().getUserCodeWrapper())
		.addParameters(operatorID, node.getProgramOperator().getParameters())
		.write(config);

	return vertex;
}
 
Example #5
Source Project: flink   Author: flink-tpc-ds   File: InputOutputFormatContainerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOnlyOutputFormat() {
	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader());

	OperatorID operatorID = new OperatorID();
	formatContainer.addOutputFormat(operatorID, new DiscardingOutputFormat<>());

	Configuration parameters = new Configuration();
	parameters.setString("parameter1", "bcd234");
	formatContainer.addParameters(operatorID, parameters);

	TaskConfig taskConfig = new TaskConfig(new Configuration());
	formatContainer.write(taskConfig);

	InputOutputFormatContainer loadedFormatContainer = new InputOutputFormatContainer(taskConfig, getClass().getClassLoader());

	Map<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> outputFormats = loadedFormatContainer.getOutputFormats();
	assertEquals(1, outputFormats.size());
	assertEquals(0, loadedFormatContainer.getInputFormats().size());

	assertTrue(outputFormats.get(operatorID).getUserCodeObject() instanceof DiscardingOutputFormat);

	Configuration loadedParameters = loadedFormatContainer.getParameters(operatorID);
	assertEquals(1, loadedParameters.keySet().size());
	assertEquals("bcd234", loadedParameters.getString("parameter1", null));
}
 
Example #6
Source Project: flink   Author: apache   File: InputOutputFormatContainerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOnlyOutputFormat() {
	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader());

	OperatorID operatorID = new OperatorID();
	formatContainer.addOutputFormat(operatorID, new DiscardingOutputFormat<>());

	Configuration parameters = new Configuration();
	parameters.setString("parameter1", "bcd234");
	formatContainer.addParameters(operatorID, parameters);

	TaskConfig taskConfig = new TaskConfig(new Configuration());
	formatContainer.write(taskConfig);

	InputOutputFormatContainer loadedFormatContainer = new InputOutputFormatContainer(taskConfig, getClass().getClassLoader());

	Map<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> outputFormats = loadedFormatContainer.getOutputFormats();
	assertEquals(1, outputFormats.size());
	assertEquals(0, loadedFormatContainer.getInputFormats().size());

	assertTrue(outputFormats.get(operatorID).getUserCodeObject() instanceof DiscardingOutputFormat);

	Configuration loadedParameters = loadedFormatContainer.getParameters(operatorID);
	assertEquals(1, loadedParameters.keySet().size());
	assertEquals("bcd234", loadedParameters.getString("parameter1", null));
}
 
Example #7
Source Project: flink   Author: flink-tpc-ds   File: JobGraphGenerator.java    License: Apache License 2.0 6 votes vote down vote up
private JobVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException {
	final InputOutputFormatVertex vertex = new InputOutputFormatVertex(node.getNodeName());
	final TaskConfig config = new TaskConfig(vertex.getConfiguration());

	final OperatorID operatorID = new OperatorID();

	vertex.setResources(node.getMinResources(), node.getPreferredResources());
	vertex.setInvokableClass(DataSinkTask.class);
	vertex.setFormatDescription(operatorID, getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper()));

	// set user code
	new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader())
		.addOutputFormat(operatorID, (UserCodeWrapper<? extends OutputFormat<?>>) node.getProgramOperator().getUserCodeWrapper())
		.addParameters(operatorID, node.getProgramOperator().getParameters())
		.write(config);

	return vertex;
}
 
Example #8
Source Project: Alink   Author: alibaba   File: HiveDB.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void sinkBatch(String tableName, Table in, Params parameter, Long sessionId) {
    try {
        checkTableExistenceBeforeSink(tableName, in, parameter);
        HiveTableFactory factory = new HiveTableFactory(getCatalog().getHiveConf());
        ObjectPath objectPath = ObjectPath.fromString(dbName + "." + tableName);
        CatalogTable catalogTable = getCatalogTable(tableName);
        HiveTableSink tableSink = (HiveTableSink) factory.createTableSink(objectPath, catalogTable);
        tableSink.setStaticPartition(getStaticPartitionSpec(parameter.get(HiveSinkParams.PARTITION)));
        tableSink.setOverwrite(true);
        OutputFormat<Row> outputFormat = tableSink.getOutputFormat();
        BatchOperator.fromTable(in).getDataSet().output(outputFormat).name("hive_sink_" + tableName);
    } catch (Exception e) {
        throw new RuntimeException("Fail to sink batch table:", e);
    }
}
 
Example #9
Source Project: flink   Author: apache   File: CassandraConnectorITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCassandraBatchRowFormat() throws Exception {
	OutputFormat<Row> sink = new CassandraRowOutputFormat(injectTableName(INSERT_DATA_QUERY), builder);
	try {
		sink.configure(new Configuration());
		sink.open(0, 1);
		for (Row value : rowCollection) {
			sink.writeRecord(value);
		}
	} finally {

		sink.close();
	}

	ResultSet rs = session.execute(injectTableName(SELECT_DATA_QUERY));
	List<com.datastax.driver.core.Row> rows = rs.all();
	Assert.assertEquals(rowCollection.size(), rows.size());
}
 
Example #10
Source Project: Flink-CEPplus   Author: ljygz   File: DataSink.java    License: Apache License 2.0 5 votes vote down vote up
public DataSink(DataSet<T> data, OutputFormat<T> format, TypeInformation<T> type) {
	if (format == null) {
		throw new IllegalArgumentException("The output format must not be null.");
	}
	if (type == null) {
		throw new IllegalArgumentException("The input type information must not be null.");
	}
	if (data == null) {
		throw new IllegalArgumentException("The data set must not be null.");
	}

	this.format = format;
	this.data = data;
	this.type = type;
}
 
Example #11
Source Project: Flink-CEPplus   Author: ljygz   File: DataSet.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <X extends Tuple> DataSink<T> internalWriteAsCsv(Path filePath, String rowDelimiter, String fieldDelimiter, WriteMode wm) {
	Preconditions.checkArgument(getType().isTupleType(), "The writeAsCsv() method can only be used on data sets of tuples.");
	CsvOutputFormat<X> of = new CsvOutputFormat<>(filePath, rowDelimiter, fieldDelimiter);
	if (wm != null) {
		of.setWriteMode(wm);
	}
	return output((OutputFormat<T>) of);
}
 
Example #12
Source Project: Flink-CEPplus   Author: ljygz   File: DataSet.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Emits a DataSet using an {@link OutputFormat}. This method adds a data sink to the program.
 * Programs may have multiple data sinks. A DataSet may also have multiple consumers (data sinks
 * or transformations) at the same time.
 *
 * @param outputFormat The OutputFormat to process the DataSet.
 * @return The DataSink that processes the DataSet.
 *
 * @see OutputFormat
 * @see DataSink
 */
public DataSink<T> output(OutputFormat<T> outputFormat) {
	Preconditions.checkNotNull(outputFormat);

	// configure the type if needed
	if (outputFormat instanceof InputTypeConfigurable) {
		((InputTypeConfigurable) outputFormat).setInputType(getType(), context.getConfig());
	}

	DataSink<T> sink = new DataSink<>(this, outputFormat, getType());
	this.context.registerDataSink(sink);
	return sink;
}
 
Example #13
Source Project: Flink-CEPplus   Author: ljygz   File: DataSinkTask.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the OutputFormat implementation and configuration.
 * 
 * @throws RuntimeException
 *         Throws if instance of OutputFormat implementation can not be
 *         obtained.
 */
private void initOutputFormat() {
	ClassLoader userCodeClassLoader = getUserCodeClassLoader();
	// obtain task configuration (including stub parameters)
	Configuration taskConf = getTaskConfiguration();
	this.config = new TaskConfig(taskConf);

	try {
		this.format = config.<OutputFormat<IT>>getStubWrapper(userCodeClassLoader).getUserCodeObject(OutputFormat.class, userCodeClassLoader);

		// check if the class is a subclass, if the check is required
		if (!OutputFormat.class.isAssignableFrom(this.format.getClass())) {
			throw new RuntimeException("The class '" + this.format.getClass().getName() + "' is not a subclass of '" + 
					OutputFormat.class.getName() + "' as is required.");
		}
	}
	catch (ClassCastException ccex) {
		throw new RuntimeException("The stub class is not a proper subclass of " + OutputFormat.class.getName(), ccex);
	}

	Thread thread = Thread.currentThread();
	ClassLoader original = thread.getContextClassLoader();
	// configure the stub. catch exceptions here extra, to report them as originating from the user code 
	try {
		thread.setContextClassLoader(userCodeClassLoader);
		this.format.configure(this.config.getStubParameters());
	}
	catch (Throwable t) {
		throw new RuntimeException("The user defined 'configure()' method in the Output Format caused an error: " 
			+ t.getMessage(), t);
	}
	finally {
		thread.setContextClassLoader(original);
	}
}
 
Example #14
Source Project: Flink-CEPplus   Author: ljygz   File: OutputFormatSinkFunctionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void setRuntimeContext() throws Exception {
	RuntimeContext mockRuntimeContext = Mockito.mock(RuntimeContext.class);

	// Make sure setRuntimeContext of the rich output format is called
	RichOutputFormat<?> mockRichOutputFormat = Mockito.mock(RichOutputFormat.class);
	new OutputFormatSinkFunction<>(mockRichOutputFormat).setRuntimeContext(mockRuntimeContext);
	Mockito.verify(mockRichOutputFormat, Mockito.times(1)).setRuntimeContext(Mockito.eq(mockRuntimeContext));

	// Make sure setRuntimeContext work well when output format is not RichOutputFormat
	OutputFormat<?> mockOutputFormat = Mockito.mock(OutputFormat.class);
	new OutputFormatSinkFunction<>(mockOutputFormat).setRuntimeContext(mockRuntimeContext);
}
 
Example #15
Source Project: flink   Author: apache   File: OutputFormatSinkFunctionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void setRuntimeContext() throws Exception {
	RuntimeContext mockRuntimeContext = Mockito.mock(RuntimeContext.class);

	// Make sure setRuntimeContext of the rich output format is called
	RichOutputFormat<?> mockRichOutputFormat = Mockito.mock(RichOutputFormat.class);
	new OutputFormatSinkFunction<>(mockRichOutputFormat).setRuntimeContext(mockRuntimeContext);
	Mockito.verify(mockRichOutputFormat, Mockito.times(1)).setRuntimeContext(Mockito.eq(mockRuntimeContext));

	// Make sure setRuntimeContext work well when output format is not RichOutputFormat
	OutputFormat<?> mockOutputFormat = Mockito.mock(OutputFormat.class);
	new OutputFormatSinkFunction<>(mockOutputFormat).setRuntimeContext(mockRuntimeContext);
}
 
Example #16
Source Project: flink   Author: flink-tpc-ds   File: DataSet.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Emits a DataSet using an {@link OutputFormat}. This method adds a data sink to the program.
 * Programs may have multiple data sinks. A DataSet may also have multiple consumers (data sinks
 * or transformations) at the same time.
 *
 * @param outputFormat The OutputFormat to process the DataSet.
 * @return The DataSink that processes the DataSet.
 *
 * @see OutputFormat
 * @see DataSink
 */
public DataSink<T> output(OutputFormat<T> outputFormat) {
	Preconditions.checkNotNull(outputFormat);

	// configure the type if needed
	if (outputFormat instanceof InputTypeConfigurable) {
		((InputTypeConfigurable) outputFormat).setInputType(getType(), context.getConfig());
	}

	DataSink<T> sink = new DataSink<>(this, outputFormat, getType());
	this.context.registerDataSink(sink);
	return sink;
}
 
Example #17
Source Project: flink   Author: flink-tpc-ds   File: InputOutputFormatContainer.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <IT> Pair<OperatorID, OutputFormat<IT>> getUniqueOutputFormat() {
	Map<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> outputFormats = formats.getOutputFormats();
	Preconditions.checkState(outputFormats.size() == 1);

	Map.Entry<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> entry = outputFormats.entrySet().iterator().next();

	return new ImmutablePair<>(entry.getKey(),
		(OutputFormat<IT>) entry.getValue().getUserCodeObject(OutputFormat.class, userCodeClassLoader));
}
 
Example #18
Source Project: flink   Author: flink-tpc-ds   File: InputOutputFormatContainer.java    License: Apache License 2.0 5 votes vote down vote up
public void addOutputFormat(OperatorID operatorId, UserCodeWrapper<? extends OutputFormat<?>> wrapper) {
	if (outputFormats.containsKey(checkNotNull(operatorId))) {
		throw new IllegalStateException("The output format has been set for the operator: " + operatorId);
	}

	outputFormats.put(operatorId, checkNotNull(wrapper));
}
 
Example #19
Source Project: flink   Author: apache   File: InputOutputFormatVertex.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeOnMaster(ClassLoader loader) throws Exception {
	final InputOutputFormatContainer formatContainer = initInputOutputformatContainer(loader);

	final ClassLoader original = Thread.currentThread().getContextClassLoader();
	try {
		// set user classloader before calling user code
		Thread.currentThread().setContextClassLoader(loader);

		// configure input formats and invoke finalizeGlobal()
		Map<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> outputFormats = formatContainer.getOutputFormats();
		for (Map.Entry<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> entry : outputFormats.entrySet()) {
			final OutputFormat<?> outputFormat;

			try {
				outputFormat = entry.getValue().getUserCodeObject();
				outputFormat.configure(formatContainer.getParameters(entry.getKey()));
			} catch (Throwable t) {
				throw new Exception("Configuring the output format (" + getFormatDescription(entry.getKey()) + ") failed: "
					+ t.getMessage(), t);
			}

			if (outputFormat instanceof FinalizeOnMaster) {
				((FinalizeOnMaster) outputFormat).finalizeGlobal(getParallelism());
			}
		}

	} finally {
		// restore original classloader
		Thread.currentThread().setContextClassLoader(original);
	}
}
 
Example #20
Source Project: flink   Author: flink-tpc-ds   File: DataSinkTask.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the OutputFormat implementation and configuration.
 * 
 * @throws RuntimeException
 *         Throws if instance of OutputFormat implementation can not be
 *         obtained.
 */
private void initOutputFormat() {
	ClassLoader userCodeClassLoader = getUserCodeClassLoader();
	// obtain task configuration (including stub parameters)
	Configuration taskConf = getTaskConfiguration();
	this.config = new TaskConfig(taskConf);

	final Pair<OperatorID, OutputFormat<IT>> operatorIDAndOutputFormat;
	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(config, userCodeClassLoader);
	try {
		operatorIDAndOutputFormat = formatContainer.getUniqueOutputFormat();
		this.format = operatorIDAndOutputFormat.getValue();

		// check if the class is a subclass, if the check is required
		if (!OutputFormat.class.isAssignableFrom(this.format.getClass())) {
			throw new RuntimeException("The class '" + this.format.getClass().getName() + "' is not a subclass of '" + 
					OutputFormat.class.getName() + "' as is required.");
		}
	}
	catch (ClassCastException ccex) {
		throw new RuntimeException("The stub class is not a proper subclass of " + OutputFormat.class.getName(), ccex);
	}

	Thread thread = Thread.currentThread();
	ClassLoader original = thread.getContextClassLoader();
	// configure the stub. catch exceptions here extra, to report them as originating from the user code 
	try {
		thread.setContextClassLoader(userCodeClassLoader);
		this.format.configure(formatContainer.getParameters(operatorIDAndOutputFormat.getKey()));
	}
	catch (Throwable t) {
		throw new RuntimeException("The user defined 'configure()' method in the Output Format caused an error: " 
			+ t.getMessage(), t);
	}
	finally {
		thread.setContextClassLoader(original);
	}
}
 
Example #21
Source Project: flink   Author: flink-tpc-ds   File: OutputFormatSinkFunctionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void setRuntimeContext() throws Exception {
	RuntimeContext mockRuntimeContext = Mockito.mock(RuntimeContext.class);

	// Make sure setRuntimeContext of the rich output format is called
	RichOutputFormat<?> mockRichOutputFormat = Mockito.mock(RichOutputFormat.class);
	new OutputFormatSinkFunction<>(mockRichOutputFormat).setRuntimeContext(mockRuntimeContext);
	Mockito.verify(mockRichOutputFormat, Mockito.times(1)).setRuntimeContext(Mockito.eq(mockRuntimeContext));

	// Make sure setRuntimeContext work well when output format is not RichOutputFormat
	OutputFormat<?> mockOutputFormat = Mockito.mock(OutputFormat.class);
	new OutputFormatSinkFunction<>(mockOutputFormat).setRuntimeContext(mockRuntimeContext);
}
 
Example #22
Source Project: flink   Author: apache   File: InputOutputFormatContainer.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <IT> Pair<OperatorID, OutputFormat<IT>> getUniqueOutputFormat() {
	Map<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> outputFormats = formats.getOutputFormats();
	Preconditions.checkState(outputFormats.size() == 1);

	Map.Entry<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> entry = outputFormats.entrySet().iterator().next();

	return new ImmutablePair<>(entry.getKey(),
		(OutputFormat<IT>) entry.getValue().getUserCodeObject(OutputFormat.class, userCodeClassLoader));
}
 
Example #23
Source Project: flink   Author: apache   File: DataSinkTask.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the OutputFormat implementation and configuration.
 * 
 * @throws RuntimeException
 *         Throws if instance of OutputFormat implementation can not be
 *         obtained.
 */
private void initOutputFormat() {
	ClassLoader userCodeClassLoader = getUserCodeClassLoader();
	// obtain task configuration (including stub parameters)
	Configuration taskConf = getTaskConfiguration();
	this.config = new TaskConfig(taskConf);

	final Pair<OperatorID, OutputFormat<IT>> operatorIDAndOutputFormat;
	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(config, userCodeClassLoader);
	try {
		operatorIDAndOutputFormat = formatContainer.getUniqueOutputFormat();
		this.format = operatorIDAndOutputFormat.getValue();

		// check if the class is a subclass, if the check is required
		if (!OutputFormat.class.isAssignableFrom(this.format.getClass())) {
			throw new RuntimeException("The class '" + this.format.getClass().getName() + "' is not a subclass of '" + 
					OutputFormat.class.getName() + "' as is required.");
		}
	}
	catch (ClassCastException ccex) {
		throw new RuntimeException("The stub class is not a proper subclass of " + OutputFormat.class.getName(), ccex);
	}

	Thread thread = Thread.currentThread();
	ClassLoader original = thread.getContextClassLoader();
	// configure the stub. catch exceptions here extra, to report them as originating from the user code 
	try {
		thread.setContextClassLoader(userCodeClassLoader);
		this.format.configure(formatContainer.getParameters(operatorIDAndOutputFormat.getKey()));
	}
	catch (Throwable t) {
		throw new RuntimeException("The user defined 'configure()' method in the Output Format caused an error: " 
			+ t.getMessage(), t);
	}
	finally {
		thread.setContextClassLoader(original);
	}
}
 
Example #24
Source Project: flink   Author: apache   File: PartitionWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a new output format with path, configure it and open it.
 */
OutputFormat<T> createNewOutputFormat(Path path) throws IOException {
	OutputFormat<T> format = factory.createOutputFormat(path);
	format.configure(conf);
	// Here we just think of it as a single file format, so there can only be a single task.
	format.open(0, 1);
	return format;
}
 
Example #25
Source Project: flink   Author: apache   File: DataSink.java    License: Apache License 2.0 5 votes vote down vote up
public DataSink(DataSet<T> data, OutputFormat<T> format, TypeInformation<T> type) {
	if (format == null) {
		throw new IllegalArgumentException("The output format must not be null.");
	}
	if (type == null) {
		throw new IllegalArgumentException("The input type information must not be null.");
	}
	if (data == null) {
		throw new IllegalArgumentException("The data set must not be null.");
	}

	this.format = format;
	this.data = data;
	this.type = type;
}
 
Example #26
Source Project: flink   Author: apache   File: DataSet.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <X extends Tuple> DataSink<T> internalWriteAsCsv(Path filePath, String rowDelimiter, String fieldDelimiter, WriteMode wm) {
	Preconditions.checkArgument(getType().isTupleType(), "The writeAsCsv() method can only be used on data sets of tuples.");
	CsvOutputFormat<X> of = new CsvOutputFormat<>(filePath, rowDelimiter, fieldDelimiter);
	if (wm != null) {
		of.setWriteMode(wm);
	}
	return output((OutputFormat<T>) of);
}
 
Example #27
Source Project: alibaba-flink-connectors   Author: alibaba   File: OutputFormatSinkFunction.java    License: Apache License 2.0 4 votes vote down vote up
public OutputFormatSinkFunction(OutputFormat<RECORD> outputFormat) {
	this.outputFormat = outputFormat;
}
 
Example #28
Source Project: alibaba-flink-connectors   Author: alibaba   File: OutputFormatSinkFunction.java    License: Apache License 2.0 4 votes vote down vote up
public OutputFormat<RECORD> getOutputFormat() {
	return outputFormat;
}
 
Example #29
Source Project: alibaba-flink-connectors   Author: alibaba   File: TupleOutputFormatSinkFunction.java    License: Apache License 2.0 4 votes vote down vote up
public TupleOutputFormatSinkFunction(OutputFormat<Tuple2<Boolean, RECORD>> outputFormat) {
	this.outputFormat = outputFormat;
}
 
Example #30
Source Project: flink   Author: apache   File: StreamingJobGraphGeneratorTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testInputOutputFormat() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Long> source = env.addSource(
		new InputFormatSourceFunction<>(
			new TypeSerializerInputFormat<>(TypeInformation.of(Long.class)),
			TypeInformation.of(Long.class)),
		TypeInformation.of(Long.class)).name("source");

	source.writeUsingOutputFormat(new DiscardingOutputFormat<>()).name("sink1");
	source.writeUsingOutputFormat(new DiscardingOutputFormat<>()).name("sink2");

	StreamGraph streamGraph = env.getStreamGraph();
	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);
	assertEquals(1, jobGraph.getNumberOfVertices());

	JobVertex jobVertex = jobGraph.getVertices().iterator().next();
	assertTrue(jobVertex instanceof InputOutputFormatVertex);

	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(
		new TaskConfig(jobVertex.getConfiguration()), Thread.currentThread().getContextClassLoader());
	Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> inputFormats = formatContainer.getInputFormats();
	Map<OperatorID, UserCodeWrapper<? extends OutputFormat<?>>> outputFormats = formatContainer.getOutputFormats();
	assertEquals(1, inputFormats.size());
	assertEquals(2, outputFormats.size());

	Map<String, OperatorID> nameToOperatorIds = new HashMap<>();
	StreamConfig headConfig = new StreamConfig(jobVertex.getConfiguration());
	nameToOperatorIds.put(headConfig.getOperatorName(), headConfig.getOperatorID());

	Map<Integer, StreamConfig> chainedConfigs = headConfig
		.getTransitiveChainedTaskConfigs(Thread.currentThread().getContextClassLoader());
	for (StreamConfig config : chainedConfigs.values()) {
		nameToOperatorIds.put(config.getOperatorName(), config.getOperatorID());
	}

	InputFormat<?, ?> sourceFormat = inputFormats.get(nameToOperatorIds.get("Source: source")).getUserCodeObject();
	assertTrue(sourceFormat instanceof TypeSerializerInputFormat);

	OutputFormat<?> sinkFormat1 = outputFormats.get(nameToOperatorIds.get("Sink: sink1")).getUserCodeObject();
	assertTrue(sinkFormat1 instanceof DiscardingOutputFormat);

	OutputFormat<?> sinkFormat2 = outputFormats.get(nameToOperatorIds.get("Sink: sink2")).getUserCodeObject();
	assertTrue(sinkFormat2 instanceof DiscardingOutputFormat);
}