org.apache.flink.table.sinks.CsvTableSink Java Examples

The following examples show how to use org.apache.flink.table.sinks.CsvTableSink. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordCountTable.java    From flink-simple-tutorial with Apache License 2.0 8 votes vote down vote up
public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);

    DataSet<WC> input = env.fromElements(
            new WC("Hello", 1),
            new WC("flink", 1),
            new WC("Hello", 1));

    Table table = tEnv.fromDataSet(input);

    Table filtered = table
            .groupBy("word")
            .select("word, frequency.sum as frequency")
            .filter("frequency = 2");

    DataSet<WC> result = tEnv.toDataSet(filtered, WC.class);
    String path = "";
    CsvTableSink tableSink = new CsvTableSink(path, ",");
    tEnv.registerTableSink("csvSink", tableSink);
    result.print();
}
 
Example #2
Source File: BatchSQLTestProgram.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	String sqlStatement = params.getRequired("sqlStatement");

	TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.newInstance()
		.useBlinkPlanner()
		.inBatchMode()
		.build());

	tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
	tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
	tEnv.registerTableSink("sinkTable",
		new CsvTableSink(outputPath)
			.configure(new String[]{"f0", "f1"}, new TypeInformation[]{Types.INT, Types.SQL_TIMESTAMP}));

	tEnv.sqlUpdate(sqlStatement);
	tEnv.execute("TestSqlJob");
}
 
Example #3
Source File: CsvSinkStreamOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Override
public CsvSinkStreamOp sinkFrom(StreamOperator in) {
    this.schema = in.getSchema();

    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final String rowDelimiter = getRowDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode writeMode;
    if (getOverwriteSink()) {
        writeMode = FileSystem.WriteMode.OVERWRITE;
    } else {
        writeMode = FileSystem.WriteMode.NO_OVERWRITE;
    }

    DataStream<Row> output = ((DataStream<Row>) in.getDataStream())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .setParallelism(numFiles);

    CsvTableSink cts = new CsvTableSink(filePath, rowDelimiter, numFiles, writeMode);
    cts.emitDataStream(output);
    return this;
}
 
Example #4
Source File: BatchSQLTestProgram.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	String sqlStatement = params.getRequired("sqlStatement");

	TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.newInstance()
		.useBlinkPlanner()
		.inBatchMode()
		.build());

	((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
	((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
	((TableEnvironmentInternal) tEnv).registerTableSinkInternal("sinkTable",
		new CsvTableSink(outputPath)
			.configure(new String[]{"f0", "f1"}, new TypeInformation[]{Types.INT, Types.SQL_TIMESTAMP}));

	TableResult result = tEnv.executeSql(sqlStatement);
	// wait job finish
	result.getJobClient().get().getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get();
}
 
Example #5
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 5 votes vote down vote up
public static void testFlinkAvroSQL() {
    System.out.println("TestCase_Test Avro SQL");
    String resultFile = "/home/vagrant/test.txt";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", "empty_schema");

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders");
        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");

        Files.deleteIfExists(Paths.get(resultFile));

        // create a TableSink
        TableSink sink = new CsvTableSink(resultFile, "|");
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example #6
Source File: CsvTableSinkFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendTableSinkFactory() {
	DescriptorProperties descriptor = createDescriptor(testingSchema);
	descriptor.putString("update-mode", "append");
	TableSink sink = createTableSink(descriptor);

	assertTrue(sink instanceof CsvTableSink);
	assertEquals(testingSchema.toRowDataType(), sink.getConsumedDataType());
}
 
Example #7
Source File: CsvTableSinkFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchTableSinkFactory() {
	DescriptorProperties descriptor = createDescriptor(testingSchema);
	TableSink sink = createTableSink(descriptor);

	assertTrue(sink instanceof CsvTableSink);
	assertEquals(testingSchema.toRowDataType(), sink.getConsumedDataType());
}
 
Example #8
Source File: TpcdsTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String sourceTablePath = params.getRequired("sourceTablePath");
	String queryPath = params.getRequired("queryPath");
	String sinkTablePath = params.getRequired("sinkTablePath");
	Boolean useTableStats = params.getBoolean("useTableStats");
	TableEnvironment tableEnvironment = prepareTableEnv(sourceTablePath, useTableStats);

	//execute TPC-DS queries
	for (String queryId : TPCDS_QUERIES) {
		System.out.println("[INFO]Run TPC-DS query " + queryId + " ...");
		String queryName = QUERY_PREFIX + queryId + QUERY_SUFFIX;
		String queryFilePath = queryPath + FILE_SEPARATOR + queryName;
		String queryString = loadFile2String(queryFilePath);
		Table resultTable = tableEnvironment.sqlQuery(queryString);

		//register sink table
		String sinkTableName = QUERY_PREFIX + queryId + "_sinkTable";
		((TableEnvironmentInternal) tableEnvironment).registerTableSinkInternal(sinkTableName,
				new CsvTableSink(
					sinkTablePath + FILE_SEPARATOR + queryId + RESULT_SUFFIX,
					COL_DELIMITER,
					1,
					FileSystem.WriteMode.OVERWRITE,
					resultTable.getSchema().getFieldNames(),
					resultTable.getSchema().getFieldDataTypes()
				));
		TableResult tableResult = resultTable.executeInsert(sinkTableName);
		// wait job finish
		tableResult.getJobClient().get()
				.getJobExecutionResult(Thread.currentThread().getContextClassLoader())
				.get();
		System.out.println("[INFO]Run TPC-DS query " + queryId + " success.");
	}
}
 
Example #9
Source File: CodeGenFlinkTable.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void main(String args[]) {

		String transform = "flatMap(new FlinkUDF.LineSplitter()).groupBy(0).sum(1).print();\n";

		String transform2 = "select(\"name\");\n";

		String header = "package dynamic;\n" +
				"import org.apache.flink.api.table.Table;\n" +
				"import com.datafibers.util.*;\n";

		String javaCode = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public void runTransform(DataSet<String> ds) {\n" +
						"try {" +
						"ds."+ transform +
						"} catch (Exception e) {" +
						"};" +
				"}}";

		String javaCode2 = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public Table transTableObj(Table tbl) {\n" +
					"try {" +
					"return tbl."+ transform2 +
					"} catch (Exception e) {" +
					"};" +
					"return null;}}";

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);
		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");
		Table ingest = tableEnv.scan("mycsv");

		try {
			String className = "dynamic.FlinkScript";
			Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode2);
			DynamicRunner runner = (DynamicRunner) aClass.newInstance();
			//runner.runTransform(ds);
			Table result = runner.transTableObj(ingest);
			// write the result Table to the TableSink
			result.writeToSink(sink);
			env.execute();

		} catch (Exception e) {
			e.printStackTrace();
		}
	}
 
Example #10
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void testFlinkSQL() {

        LOG.info("Only Unit Testing Function is enabled");
        String resultFile = "/home/vagrant/test.txt";

        try {

            String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
            StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
                    .setParallelism(1);
            String kafkaTopic = "finance";
            String kafkaTopic_stage = "df_trans_stage_finance";
            String kafkaTopic_out = "df_trans_out_finance";



            StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers", "localhost:9092");
            properties.setProperty("group.id", "consumer3");

            // Internal covert Json String to Json - Begin
            DataStream<String> stream = env
                    .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties));

            stream.map(new MapFunction<String, String>() {
                @Override
                public String map(String jsonString) throws Exception {
                    return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}");
                }
            }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema()));
            // Internal covert Json String to Json - End

            String[] fieldNames =  new String[] {"name"};
            Class<?>[] fieldTypes = new Class<?>[] {String.class};

            Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource(
                    kafkaTopic_stage,
                    properties,
                    fieldNames,
                    fieldTypes);

            //kafkaTableSource.setFailOnMissingField(true);

            tableEnv.registerTableSource("Orders", kafkaTableSource);

            //Table result = tableEnv.sql("SELECT STREAM name FROM Orders");
            Table result = tableEnv.sql("SELECT name FROM Orders");

            Files.deleteIfExists(Paths.get(resultFile));

            // create a TableSink
            TableSink sink = new CsvTableSink(resultFile, "|");
            // write the result Table to the TableSink
            result.writeToSink(sink);

            env.execute("FlinkConsumer");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 
Example #11
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void testFlinkAvroSQLWithStaticSchema() {
    System.out.println("TestCase_Test Avro SQL with static Schema");

    final String STATIC_USER_SCHEMA = "{"
            + "\"type\":\"record\","
            + "\"name\":\"myrecord\","
            + "\"fields\":["
            + "  { \"name\":\"symbol\", \"type\":\"string\" },"
            + "  { \"name\":\"name\", \"type\":\"string\" },"
            + "  { \"name\":\"exchangecode\", \"type\":\"string\" }"
            + "]}";
    String resultFile = "/home/vagrant/test.txt";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", STATIC_USER_SCHEMA);

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders");
        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");

        Files.deleteIfExists(Paths.get(resultFile));

        // create a TableSink
        TableSink sink = new CsvTableSink(resultFile, "|");
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example #12
Source File: WordCountStream.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void main(String args[]) {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

		// Create a DataStream from a list of elements
		//DataStream<Integer> ds = env.fromElements(1, 2, 3, 4, 5);

		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);



		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");


		//tableEnv.registerDataStream("tbl", ds, "a");
		//Table ingest = tableEnv.fromDataStream(ds, "name");
		Table in = tableEnv.scan("mycsv");
		//Table in = tableEnv.ingest("tbl");
		//Table in = tableEnv.fromDataStream(ds, "a");

		Table result = in.select("name");
		result.writeToSink(sink);
		try {
			env.execute();
		} catch (Exception e) {

		}

		System.out.print("DONE");
	}