org.apache.flink.table.sources.CsvTableSource Java Examples

The following examples show how to use org.apache.flink.table.sources.CsvTableSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SQLExampleWordCount.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

    CsvTableSource csvTableSource = CsvTableSource.builder()
            .field("word", Types.STRING)
            .path(path)
            .build();
    blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);
    Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word), word FROM zhisheng GROUP BY word");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}
 
Example #2
Source File: SQLExampleWordCount.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

    CsvTableSource csvTableSource = CsvTableSource.builder()
            .field("word", Types.STRING)
            .path(path)
            .build();
    blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);
    Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word), word FROM zhisheng GROUP BY word");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}
 
Example #3
Source File: CustomTableSinkMain.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        blinkStreamEnv.setParallelism(1);
        EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();
        StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

        String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

        CsvTableSource csvTableSource = CsvTableSource.builder()
                .field("word", Types.STRING)
                .path(path)
                .build();
        blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);

        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new TypeInformation[]{Types.LONG, Types.STRING});
        //或者
//        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
        blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

        Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS c, word FROM zhisheng GROUP BY word");

        wordWithCount.insertInto("sinkTable");
        blinkStreamTableEnv.execute("Blink Custom Table Sink");
    }
 
Example #4
Source File: CsvConnectorDescriptor.java    From alchemy with Apache License 2.0 5 votes vote down vote up
private <T> T buildCsvFlinkSource(List<Field> schema, FormatDescriptor format) {
    String[] columnNames = new String[schema.size()];
    TypeInformation[] columnTypes = new TypeInformation[schema.size()];
    for (int i = 0; i < schema.size(); i++) {
        columnNames[i] = schema.get(i).getName();
        TypeInformation typeInformation = TypeUtils.readTypeInfo(schema.get(i).getType());
        if (typeInformation == null) {
            throw new UnsupportedOperationException("Unsupported type:" + schema.get(i).getType());
        }
        columnTypes[i] = typeInformation;
    }
    return (T)new CsvTableSource(path, columnNames, columnTypes, fieldDelim, rowDelim, quoteCharacter,
        ignoreFirstLine, ignoreComments, lenient);
}
 
Example #5
Source File: SourceDescriptorTest.java    From alchemy with Apache License 2.0 5 votes vote down vote up
@Test
public void buildCsvSource() throws Exception {
    File file = ResourceUtils.getFile("classpath:yaml/csv-source.yaml");
    SourceDescriptor sourceDescriptor = BindPropertiesUtil.bindProperties(file, SourceDescriptor.class);
    CsvConnectorDescriptor connectorDescriptor = BindPropertiesUtil.bindProperties(sourceDescriptor.getConnector(), CsvConnectorDescriptor.class);
    assertThat(connectorDescriptor.getPath()).isNotNull();
    assertThat(connectorDescriptor.getFieldDelim()).isNotNull();
    assertThat(connectorDescriptor.getRowDelim()).isNotNull();
    assertThat(connectorDescriptor.getIgnoreComments()).isNotNull();
    assertThat(connectorDescriptor.isIgnoreFirstLine()).isTrue();
    assertThat(sourceDescriptor.getSchema()).isNotNull();
    CsvTableSource tableSource = connectorDescriptor.buildSource(sourceDescriptor.getSchema(), null);
    assertThat(tableSource).isNotNull();
}
 
Example #6
Source File: CustomTableSinkMain.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        blinkStreamEnv.setParallelism(1);
        EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();
        StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

        String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

        CsvTableSource csvTableSource = CsvTableSource.builder()
                .field("word", Types.STRING)
                .path(path)
                .build();
        blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);

        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new TypeInformation[]{Types.LONG, Types.STRING});
        //或者
//        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
        blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

        Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS c, word FROM zhisheng GROUP BY word");

        wordWithCount.insertInto("sinkTable");
        blinkStreamTableEnv.execute("Blink Custom Table Sink");
    }
 
Example #7
Source File: CsvTableSinkFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendTableSourceFactory() {
	DescriptorProperties descriptor = createDescriptor(testingSchema);
	descriptor.putString("update-mode", "append");
	TableSource sink = createTableSource(descriptor);

	assertTrue(sink instanceof CsvTableSource);
	assertEquals(testingSchema.toRowDataType(), sink.getProducedDataType());
}
 
Example #8
Source File: CsvTableSinkFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchTableSourceFactory() {
	DescriptorProperties descriptor = createDescriptor(testingSchema);
	TableSource sink = createTableSource(descriptor);

	assertTrue(sink instanceof CsvTableSource);
	assertEquals(testingSchema.toRowDataType(), sink.getProducedDataType());
}
 
Example #9
Source File: CodeGenFlinkTable.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void main(String args[]) {

		String transform = "flatMap(new FlinkUDF.LineSplitter()).groupBy(0).sum(1).print();\n";

		String transform2 = "select(\"name\");\n";

		String header = "package dynamic;\n" +
				"import org.apache.flink.api.table.Table;\n" +
				"import com.datafibers.util.*;\n";

		String javaCode = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public void runTransform(DataSet<String> ds) {\n" +
						"try {" +
						"ds."+ transform +
						"} catch (Exception e) {" +
						"};" +
				"}}";

		String javaCode2 = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public Table transTableObj(Table tbl) {\n" +
					"try {" +
					"return tbl."+ transform2 +
					"} catch (Exception e) {" +
					"};" +
					"return null;}}";

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);
		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");
		Table ingest = tableEnv.scan("mycsv");

		try {
			String className = "dynamic.FlinkScript";
			Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode2);
			DynamicRunner runner = (DynamicRunner) aClass.newInstance();
			//runner.runTransform(ds);
			Table result = runner.transTableObj(ingest);
			// write the result Table to the TableSink
			result.writeToSink(sink);
			env.execute();

		} catch (Exception e) {
			e.printStackTrace();
		}
	}
 
Example #10
Source File: WordCountStream.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void main(String args[]) {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

		// Create a DataStream from a list of elements
		//DataStream<Integer> ds = env.fromElements(1, 2, 3, 4, 5);

		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);



		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");


		//tableEnv.registerDataStream("tbl", ds, "a");
		//Table ingest = tableEnv.fromDataStream(ds, "name");
		Table in = tableEnv.scan("mycsv");
		//Table in = tableEnv.ingest("tbl");
		//Table in = tableEnv.fromDataStream(ds, "a");

		Table result = in.select("name");
		result.writeToSink(sink);
		try {
			env.execute();
		} catch (Exception e) {

		}

		System.out.print("DONE");
	}
 
Example #11
Source File: TpcdsTestProgram.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Prepare TableEnvironment for query.
 *
 * @param sourceTablePath
 * @return
 */
private static TableEnvironment prepareTableEnv(String sourceTablePath, Boolean useTableStats) {
	//init Table Env
	EnvironmentSettings environmentSettings = EnvironmentSettings
			.newInstance()
			.useBlinkPlanner()
			.inBatchMode()
			.build();
	TableEnvironment tEnv = TableEnvironment.create(environmentSettings);

	//config Optimizer parameters
	tEnv.getConfig().getConfiguration()
			.setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 4);
	tEnv.getConfig().getConfiguration()
			.setLong(OptimizerConfigOptions.TABLE_OPTIMIZER_BROADCAST_JOIN_THRESHOLD, 10 * 1024 * 1024);
	tEnv.getConfig().getConfiguration()
			.setBoolean(OptimizerConfigOptions.TABLE_OPTIMIZER_JOIN_REORDER_ENABLED, true);

	//register TPC-DS tables
	TPCDS_TABLES.forEach(table -> {
		TpcdsSchema schema = TpcdsSchemaProvider.getTableSchema(table);
		CsvTableSource.Builder builder = CsvTableSource.builder();
		builder.path(sourceTablePath + FILE_SEPARATOR + table + DATA_SUFFIX);
		for (int i = 0; i < schema.getFieldNames().size(); i++) {
			builder.field(
					schema.getFieldNames().get(i),
					TypeConversions.fromDataTypeToLegacyInfo(schema.getFieldTypes().get(i)));
		}
		builder.fieldDelimiter(COL_DELIMITER);
		builder.emptyColumnAsNull();
		builder.lineDelimiter("\n");
		CsvTableSource tableSource = builder.build();
		ConnectorCatalogTable catalogTable = ConnectorCatalogTable.source(tableSource, true);
		tEnv.getCatalog(tEnv.getCurrentCatalog()).ifPresent(catalog -> {
			try {
				catalog.createTable(new ObjectPath(tEnv.getCurrentDatabase(), table), catalogTable, false);
			} catch (Exception e) {
				throw new RuntimeException(e);
			}
		});
	});
	// register statistics info
	if (useTableStats) {
		TpcdsStatsProvider.registerTpcdsStats(tEnv);
	}
	return tEnv;
}