Java Code Examples for org.apache.flink.table.api.java.StreamTableEnvironment#registerTableSource()

The following examples show how to use org.apache.flink.table.api.java.StreamTableEnvironment#registerTableSource() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CustomKafkaSourceMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    blinkStreamTableEnv.registerTableSource("kafkaDataStream", new MyKafkaTableSource(ExecutionEnvUtil.PARAMETER_TOOL));

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Custom Kafka Table Source");
}
 
Example 2
Source File: CustomKafkaSourceMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    blinkStreamTableEnv.registerTableSource("kafkaDataStream", new MyKafkaTableSource(ExecutionEnvUtil.PARAMETER_TOOL));

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Custom Kafka Table Source");
}
 
Example 3
Source File: MaxTravellersPerDestination.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
@Override
public void handleRequest() {

    TableSchema tableSchema = TripRecord.getTableSchema();

    FlinkPravegaJsonTableSource source = FlinkPravegaJsonTableSource.builder()
            .forStream(Stream.of(getScope(), getStream()).getScopedName())
            .withPravegaConfig(getPravegaConfig())
            .failOnMissingField(true)
            .withRowtimeAttribute("dropOffTime", new ExistingField("dropOffTime"), new BoundedOutOfOrderTimestamps(30000L))
            .withSchema(tableSchema)
            .withReaderGroupScope(getScope())
            .build();

    StreamExecutionEnvironment env = getStreamExecutionEnvironment();

    // create a TableEnvironment
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.registerTableSource("TaxiRide", source);

    String fields = "passengerCount, dropOffTime, destLocationZone";

    Table noOfTravelersPerDest = tEnv
            .scan("TaxiRide")
            .select(fields)
            .window(Tumble.over("1.hour").on("dropOffTime").as("w"))
            .groupBy("destLocationZone, w")
            .select("destLocationZone, w.start AS start, w.end AS end, count(passengerCount) AS cnt");

    tEnv.toAppendStream(noOfTravelersPerDest, Row.class).print();

    try {
        env.execute("Max-Travellers-Per-Destination");
    } catch (Exception e) {
        log.error("Application Failed", e);
    }
}
 
Example 4
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 5 votes vote down vote up
public static void testFlinkAvroSQLJson() {
    System.out.println("TestCase_Test Avro SQL to Json Sink");
    final String STATIC_USER_SCHEMA = "{"
            + "\"type\":\"record\","
            + "\"name\":\"myrecord\","
            + "\"fields\":["
            + "  { \"name\":\"symbol\", \"type\":\"string\" },"
            + "  { \"name\":\"name\", \"type\":\"string\" },"
            + "  { \"name\":\"exchangecode\", \"type\":\"string\" }"
            + "]}";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    DFRemoteStreamEnvironment env = new DFRemoteStreamEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("useAvro", "avro");
    properties.setProperty("static.avro.schema",
            SchemaRegistryClient.getSchemaFromRegistry("http://localhost:8081", "test-value", "latest").toString());

    try {
        HashMap<String, String> hm = new HashMap<>();
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");
        //Kafka09JsonTableSink json_sink = new Kafka09JsonTableSink ("test_json", properties, new FlinkFixedPartitioner());
        Kafka09AvroTableSink json_sink = new Kafka09AvroTableSink ("test_json", properties, new FlinkFixedPartitioner());

        // write the result Table to the TableSink
        result.writeToSink(json_sink);
        env.executeWithDFObj("Flink AVRO SQL KAFKA Test", new DFJobPOPJ().setJobConfig(hm) );
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 5
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 5 votes vote down vote up
public static void testFlinkAvroSQL() {
    System.out.println("TestCase_Test Avro SQL");
    String resultFile = "/home/vagrant/test.txt";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", "empty_schema");

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders");
        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");

        Files.deleteIfExists(Paths.get(resultFile));

        // create a TableSink
        TableSink sink = new CsvTableSink(resultFile, "|");
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 6
Source File: FlinkAvroSQLClient.java    From df_data_service with Apache License 2.0 5 votes vote down vote up
public static void tcFlinkAvroSQL(String KafkaServerHostPort, String SchemaRegistryHostPort,
                                  String srcTopic, String targetTopic,
                                  String consumerGroupId, String sinkKeys, String sqlState) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

    Properties properties = new Properties();
    properties.setProperty(ConstantApp.PK_KAFKA_HOST_PORT.replace("_", "."), KafkaServerHostPort);
    properties.setProperty(ConstantApp.PK_KAFKA_CONSUMER_GROURP, consumerGroupId);
    properties.setProperty(ConstantApp.PK_KAFKA_SCHEMA_REGISTRY_HOST_PORT.replace("_", "."), SchemaRegistryHostPort);
    properties.setProperty(ConstantApp.PK_FLINK_TABLE_SINK_KEYS, sinkKeys);

    String[] srcTopicList = srcTopic.split(",");
    for (int i = 0; i < srcTopicList.length; i++) {
        properties.setProperty(ConstantApp.PK_SCHEMA_SUB_INPUT, srcTopicList[i]);
        properties.setProperty(ConstantApp.PK_SCHEMA_ID_INPUT, SchemaRegistryClient.getLatestSchemaIDFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_INPUT) + "");
        properties.setProperty(ConstantApp.PK_SCHEMA_STR_INPUT, SchemaRegistryClient.getLatestSchemaFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_INPUT).toString());
        tableEnv.registerTableSource(srcTopicList[i], new Kafka010AvroTableSource(srcTopicList[i], properties));
    }

    try {
        Table result = tableEnv.sql(sqlState);
        SchemaRegistryClient.addSchemaFromTableResult(SchemaRegistryHostPort, targetTopic, result);
        // For old producer, we need to create topic-value subject as well
        SchemaRegistryClient.addSchemaFromTableResult(SchemaRegistryHostPort, targetTopic + "-value", result);

        // delivered properties for sink
        properties.setProperty(ConstantApp.PK_SCHEMA_SUB_OUTPUT, targetTopic);
        properties.setProperty(ConstantApp.PK_SCHEMA_ID_OUTPUT, SchemaRegistryClient.getLatestSchemaIDFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_OUTPUT) + "");
        properties.setProperty(ConstantApp.PK_SCHEMA_STR_OUTPUT, SchemaRegistryClient.getLatestSchemaFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_OUTPUT).toString());

        Kafka09AvroTableSink avro_sink =
                new Kafka09AvroTableSink(targetTopic, properties, new FlinkFixedPartitioner());
        result.writeToSink(avro_sink);
        env.execute("DF_FlinkSQL_Client_" + srcTopic + "-" + targetTopic);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 7
Source File: PopularTaxiVendor.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
@Override
public void handleRequest() {

    TableSchema tableSchema = TripRecord.getTableSchema();

    FlinkPravegaJsonTableSource source = FlinkPravegaJsonTableSource.builder()
            .forStream(Stream.of(getScope(), getStream()).getScopedName())
            .withPravegaConfig(getPravegaConfig())
            .failOnMissingField(true)
            .withRowtimeAttribute("pickupTime", new ExistingField("pickupTime"), new BoundedOutOfOrderTimestamps(30000L))
            .withSchema(tableSchema)
            .withReaderGroupScope(getScope())
            .build();

    StreamExecutionEnvironment env = getStreamExecutionEnvironment();

    // create a TableEnvironment
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.registerTableSource("TaxiRide", source);

    String fields = "vendorId, pickupTime, startLocationId, destLocationId, startLocationBorough, startLocationZone, destLocationBorough, destLocationZone";

    Table popularRides = tEnv
            .scan("TaxiRide")
            .select(fields)
            .window(Slide.over("15.minutes").every("5.minutes").on("pickupTime").as("w"))
            .groupBy("vendorId, w")
            .select("vendorId, w.start AS start, w.end AS end, count(vendorId) AS cnt");

    tEnv.toAppendStream(popularRides, Row.class).print();

    try {
        env.execute("Popular-Taxi-Vendor");
    } catch (Exception e) {
        log.error("Application Failed", e);
    }
}
 
Example 8
Source File: CustomTableSinkMain.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        blinkStreamEnv.setParallelism(1);
        EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();
        StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

        String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

        CsvTableSource csvTableSource = CsvTableSource.builder()
                .field("word", Types.STRING)
                .path(path)
                .build();
        blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);

        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new TypeInformation[]{Types.LONG, Types.STRING});
        //或者
//        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
        blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

        Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS c, word FROM zhisheng GROUP BY word");

        wordWithCount.insertInto("sinkTable");
        blinkStreamTableEnv.execute("Blink Custom Table Sink");
    }
 
Example 9
Source File: HBaseConnectorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testHBaseLookupTableSource() throws Exception {
	if (OLD_PLANNER.equals(planner)) {
		// lookup table source is only supported in blink planner, skip for old planner
		return;
	}
	StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment streamTableEnv = StreamTableEnvironment.create(streamEnv, streamSettings);
	StreamITCase.clear();

	// prepare a source table
	String srcTableName = "src";
	DataStream<Row> ds = streamEnv.fromCollection(testData2).returns(testTypeInfo2);
	Table in = streamTableEnv.fromDataStream(ds, "a, b, c, proc.proctime");
	streamTableEnv.registerTable(srcTableName, in);

	Map<String, String> tableProperties = hbaseTableProperties();
	TableSource source = TableFactoryService
		.find(HBaseTableFactory.class, tableProperties)
		.createTableSource(tableProperties);
	streamTableEnv.registerTableSource("hbaseLookup", source);
	// perform a temporal table join query
	String query = "SELECT a,family1.col1, family3.col3 FROM src " +
		"JOIN hbaseLookup FOR SYSTEM_TIME AS OF src.proc as h ON src.a = h.rk";
	Table result = streamTableEnv.sqlQuery(query);

	DataStream<Row> resultSet = streamTableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<>());

	streamEnv.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,10,Welt-1");
	expected.add("2,20,Welt-2");
	expected.add("3,30,Welt-3");
	expected.add("3,30,Welt-3");

	StreamITCase.compareWithList(expected);
}
 
Example 10
Source File: CustomTableSinkMain.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        blinkStreamEnv.setParallelism(1);
        EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();
        StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

        String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

        CsvTableSource csvTableSource = CsvTableSource.builder()
                .field("word", Types.STRING)
                .path(path)
                .build();
        blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);

        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new TypeInformation[]{Types.LONG, Types.STRING});
        //或者
//        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
        blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

        Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS c, word FROM zhisheng GROUP BY word");

        wordWithCount.insertInto("sinkTable");
        blinkStreamTableEnv.execute("Blink Custom Table Sink");
    }
 
Example 11
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void testFlinkAvroScriptWithStaticSchema() {
    System.out.println("TestCase_Test Avro Table API Script with static Schema");

    final String STATIC_USER_SCHEMA = "{"
            + "\"type\":\"record\","
            + "\"name\":\"myrecord\","
            + "\"fields\":["
            + "  { \"name\":\"symbol\", \"type\":\"string\" },"
            + "  { \"name\":\"name\", \"type\":\"string\" },"
            + "  { \"name\":\"exchangecode\", \"type\":\"string\" }"
            + "]}";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", STATIC_USER_SCHEMA);

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        Table ingest = tableEnv.scan("Orders");

        String className = "dynamic.FlinkScript";

        String header = "package dynamic;\n" +
                "import org.apache.flink.table.api.Table;\n" +
                "import com.datafibers.util.*;\n";

        String transScript = "select(\"name\")";

        String javaCode = header +
                "public class FlinkScript implements DynamicRunner {\n" +
                "@Override \n" +
                "    public Table transTableObj(Table tbl) {\n" +
                "try {" +
                "return tbl."+ transScript + ";" +
                "} catch (Exception e) {" +
                "};" +
                "return null;}}";

        // Dynamic code generation
        Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode);
        DynamicRunner runner = (DynamicRunner) aClass.newInstance();
        Table result = runner.transTableObj(ingest);

        Kafka09AvroTableSink sink =
                new Kafka09AvroTableSink ("test_json", properties, new FlinkFixedPartitioner());
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 12
Source File: StreamSQLTestProgram.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");
		String planner = params.get("planner", "old");

		final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance();
		builder.inStreamingMode();

		if (planner.equals("old")) {
			builder.useOldPlanner();
		} else if (planner.equals("blink")) {
			builder.useBlinkPlanner();
		}

		final EnvironmentSettings settings = builder.build();

		final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}
 
Example 13
Source File: StreamSQLTestProgram.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");

		StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}
 
Example 14
Source File: PopularDestinationQuery.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
@Override
public void handleRequest() {

    TableSchema tableSchema = TripRecord.getTableSchema();

    FlinkPravegaJsonTableSource source = FlinkPravegaJsonTableSource.builder()
            .forStream(Stream.of(getScope(), getStream()).getScopedName())
            .withPravegaConfig(getPravegaConfig())
            .failOnMissingField(true)
            .withRowtimeAttribute("pickupTime",
                    new ExistingField("pickupTime"),
                    new BoundedOutOfOrderTimestamps(30000L))
            .withSchema(tableSchema)
            .withReaderGroupScope(getScope())
            .build();


    StreamExecutionEnvironment env = getStreamExecutionEnvironment();

    // create a TableEnvironment
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.registerTableSource("TaxiRide", source);

    String query =
            "SELECT " +
                    "destLocationId, wstart, wend, cnt " +
                    "FROM " +
                    "(SELECT " +
                    "destLocationId, " +
                    "HOP_START(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wstart, " +
                    "HOP_END(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wend, " +
                    "COUNT(destLocationId) AS cnt " +
                    "FROM " +
                    "(SELECT " +
                    "pickupTime, " +
                    "destLocationId " +
                    "FROM TaxiRide) " +
                    "GROUP BY destLocationId, HOP(pickupTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE)) " +
                    "WHERE cnt > " + getLimit();

    Table results = tEnv.sqlQuery(query);

    tEnv.toAppendStream(results, Row.class).print();

    try {
        env.execute("Popular-Destination");
    } catch (Exception e) {
        log.error("Application Failed", e);
    }
}
 
Example 15
Source File: FlinkAvroTableAPIClient.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void tcFlinkAvroTableAPI(String KafkaServerHostPort, String SchemaRegistryHostPort,
                                  String srcTopic, String targetTopic,
                                  String consumerGroupId, String sinkKeys, String transScript) {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

    Properties properties = new Properties();
    properties.setProperty(ConstantApp.PK_KAFKA_HOST_PORT.replace("_", "."), KafkaServerHostPort);
    properties.setProperty(ConstantApp.PK_KAFKA_CONSUMER_GROURP, consumerGroupId);
    properties.setProperty(ConstantApp.PK_KAFKA_SCHEMA_REGISTRY_HOST_PORT.replace("_", "."), SchemaRegistryHostPort);
    properties.setProperty(ConstantApp.PK_FLINK_TABLE_SINK_KEYS, sinkKeys);

    String[] srcTopicList = srcTopic.split(",");
    for (int i = 0; i < srcTopicList.length; i++) {
        properties.setProperty(ConstantApp.PK_SCHEMA_SUB_INPUT, srcTopicList[i]);
        properties.setProperty(ConstantApp.PK_SCHEMA_ID_INPUT, SchemaRegistryClient.getLatestSchemaIDFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_INPUT) + "");
        properties.setProperty(ConstantApp.PK_SCHEMA_STR_INPUT, SchemaRegistryClient.getLatestSchemaFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_INPUT).toString());
        tableEnv.registerTableSource(srcTopic, new Kafka010AvroTableSource(srcTopicList[i], properties));
    }

    try {
        Table result;
        Table ingest = tableEnv.scan(srcTopic);
        String className = "dynamic.FlinkScript";
        String header = "package dynamic;\n" +
                "import org.apache.flink.table.api.Table;\n" +
                "import com.datafibers.util.*;\n";
        String javaCode = header +
                "public class FlinkScript implements DynamicRunner {\n" +
                "@Override \n" +
                "    public Table transTableObj(Table tbl) {\n" +
                "try {" +
                "return tbl." + transScript + ";\n" +
                "} catch (Exception e) {" +
                "};" +
                "return null;}}";
        // Dynamic code generation
        Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode);
        DynamicRunner runner = (DynamicRunner) aClass.newInstance();
        result = runner.transTableObj(ingest);

        SchemaRegistryClient.addSchemaFromTableResult(SchemaRegistryHostPort, targetTopic, result);
        // delivered properties for sink
        properties.setProperty(ConstantApp.PK_SCHEMA_SUB_OUTPUT, targetTopic);
        properties.setProperty(ConstantApp.PK_SCHEMA_ID_OUTPUT, SchemaRegistryClient.getLatestSchemaIDFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_OUTPUT) + "");
        properties.setProperty(ConstantApp.PK_SCHEMA_STR_OUTPUT, SchemaRegistryClient.getLatestSchemaFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_OUTPUT).toString());

        Kafka09AvroTableSink avro_sink =
                new Kafka09AvroTableSink(targetTopic, properties, new FlinkFixedPartitioner());
        result.writeToSink(avro_sink);
        env.execute("DF_FlinkTableAPI_Client_" + srcTopic + "-" + targetTopic);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 16
Source File: TCFlinkAvroSQL.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void tcFlinkAvroSQL(String SchemaRegistryHostPort, String srcTopic, String targetTopic, String sqlState) {
    System.out.println("tcFlinkAvroSQL");
    String resultFile = "testResult";

    String jarPath = "C:/Users/dadu/Coding/df_data_service/target/df-data-service-1.1-SNAPSHOT-fat.jar";
    //String jarPath = "/Users/will/Documents/Coding/GitHub/df_data_service/target/df-data-service-1.1-SNAPSHOT-fat.jar";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

    Properties properties = new Properties();
    properties.setProperty(ConstantApp.PK_KAFKA_HOST_PORT.replace("_", "."), "localhost:9092");
    properties.setProperty(ConstantApp.PK_KAFKA_CONSUMER_GROURP, "consumer_test");
    //properties.setProperty(ConstantApp.PK_SCHEMA_SUB_OUTPUT, "test");
    properties.setProperty(ConstantApp.PK_KAFKA_SCHEMA_REGISTRY_HOST_PORT.replace("_", "."), SchemaRegistryHostPort);
    properties.setProperty(ConstantApp.PK_FLINK_TABLE_SINK_KEYS, "symbol");

    String[] srcTopicList = srcTopic.split(",");
    for (int i = 0; i < srcTopicList.length; i++) {
        properties.setProperty(ConstantApp.PK_SCHEMA_SUB_INPUT, srcTopicList[i]);
        properties.setProperty(ConstantApp.PK_SCHEMA_ID_INPUT, SchemaRegistryClient.getLatestSchemaIDFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_INPUT) + "");
        properties.setProperty(ConstantApp.PK_SCHEMA_STR_INPUT, SchemaRegistryClient.getLatestSchemaFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_INPUT).toString());
        tableEnv.registerTableSource(srcTopicList[i], new Kafka010AvroTableSource(srcTopicList[i], properties));
    }

    try {
        Table result = tableEnv.sql(sqlState);
        result.printSchema();
        System.out.println("generated avro schema is = " + SchemaRegistryClient.tableAPIToAvroSchema(result, targetTopic));
        SchemaRegistryClient.addSchemaFromTableResult(SchemaRegistryHostPort, targetTopic, result);

        // delivered properties
        properties.setProperty(ConstantApp.PK_SCHEMA_SUB_OUTPUT, targetTopic);
        properties.setProperty(ConstantApp.PK_SCHEMA_ID_OUTPUT, SchemaRegistryClient.getLatestSchemaIDFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_OUTPUT) + "");
        properties.setProperty(ConstantApp.PK_SCHEMA_STR_OUTPUT, SchemaRegistryClient.getLatestSchemaFromProperty(properties, ConstantApp.PK_SCHEMA_SUB_OUTPUT).toString());

        System.out.println(Paths.get(resultFile).toAbsolutePath());
        Kafka09AvroTableSink avro_sink =
                new Kafka09AvroTableSink(targetTopic, properties, new FlinkFixedPartitioner());
        result.writeToSink(avro_sink);
        //result.writeToSink(new CsvTableSink(resultFile, "|", 1, FileSystem.WriteMode.OVERWRITE));
        env.execute("tcFlinkAvroSQL");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 17
Source File: RidesPerHour.java    From flink-training-exercises with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// read parameters
		ParameterTool params = ParameterTool.fromArgs(args);
		String input = params.getRequired("input");

		final int maxEventDelay = 60;       	// events are out of order by max 60 seconds
		final int servingSpeedFactor = 1800; 	// events of 30 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// create a TableEnvironment
		StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

		// register TaxiRideTableSource as table "TaxiRides"
		tEnv.registerTableSource(
				"TaxiRides",
				new TaxiRideTableSource(
						input,
						maxEventDelay,
						servingSpeedFactor));

		// register user-defined functions
		tEnv.registerFunction("isInNYC", new GeoUtils.IsInNYC());
		tEnv.registerFunction("toCellId", new GeoUtils.ToCellId());
		tEnv.registerFunction("toCoords", new GeoUtils.ToCoords());

		Table results = tEnv.sqlQuery(
				//"SELECT TUMBLE_START(eventTime, INTERVAL '1' HOUR), isStart, count(isStart) FROM TaxiRides GROUP BY isStart, TUMBLE(eventTime, INTERVAL '1' HOUR)"
				//"SELECT avg(endTime - startTime), passengerCnt FROM TaxiRides GROUP BY passengerCnt"
				"SELECT CAST (toCellId(endLon, endLat) AS VARCHAR), eventTime," +
				"COUNT(*) OVER (" +
					"PARTITION BY toCellId(endLon, endLat) ORDER BY eventTime RANGE BETWEEN INTERVAL '10' MINUTE PRECEDING AND CURRENT ROW" +
				") " +
				"FROM( SELECT * FROM TaxiRides WHERE not isStart AND toCellId(endLon, endLat) = 50801 )"
		);

		// convert Table into an append stream and print it
		// (if instead we needed a retraction stream we would use tEnv.toRetractStream)
		tEnv.toRetractStream(results, Row.class).print();

		// execute query
		env.execute();
	}
 
Example 18
Source File: PopularPlacesTableApi.java    From flink-training-exercises with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// read parameters
		ParameterTool params = ParameterTool.fromArgs(args);
		String input = params.getRequired("input");

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// create a TableEnvironment
		StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

		// register TaxiRideTableSource as table "TaxiRides"
		tEnv.registerTableSource(
				"TaxiRides",
				new TaxiRideTableSource(
						input,
						maxEventDelay,
						servingSpeedFactor));

		// register user-defined functions
		tEnv.registerFunction("isInNYC", new GeoUtils.IsInNYC());
		tEnv.registerFunction("toCellId", new GeoUtils.ToCellId());
		tEnv.registerFunction("toCoords", new GeoUtils.ToCoords());

		Table popPlaces = tEnv
				// scan TaxiRides table
				.scan("TaxiRides")
				// filter for valid rides
				.filter("isInNYC(startLon, startLat) && isInNYC(endLon, endLat)")
				// select fields and compute grid cell of departure or arrival coordinates
				.select("eventTime, " +
						"isStart, " +
						"(isStart = true).?(toCellId(startLon, startLat), toCellId(endLon, endLat)) AS cell")
				// specify sliding window over 15 minutes with slide of 5 minutes
				.window(Slide.over("15.minutes").every("5.minutes").on("eventTime").as("w"))
				// group by cell, isStart, and window
				.groupBy("cell, isStart, w")
				// count departures and arrivals per cell (location) and window (time)
				.select("cell, isStart, w.start AS start, w.end AS end, count(isStart) AS popCnt")
				// filter for popular places
				.filter("popCnt > 20")
				// convert cell back to coordinates
				.select("toCoords(cell) AS location, start, end, isStart, popCnt");

		// convert Table into an append stream and print it
		tEnv.toAppendStream(popPlaces, Row.class).print();

		// execute query
		env.execute();
	}
 
Example 19
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void testFlinkAvroSQLWithStaticSchema() {
    System.out.println("TestCase_Test Avro SQL with static Schema");

    final String STATIC_USER_SCHEMA = "{"
            + "\"type\":\"record\","
            + "\"name\":\"myrecord\","
            + "\"fields\":["
            + "  { \"name\":\"symbol\", \"type\":\"string\" },"
            + "  { \"name\":\"name\", \"type\":\"string\" },"
            + "  { \"name\":\"exchangecode\", \"type\":\"string\" }"
            + "]}";
    String resultFile = "/home/vagrant/test.txt";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", STATIC_USER_SCHEMA);

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders");
        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");

        Files.deleteIfExists(Paths.get(resultFile));

        // create a TableSink
        TableSink sink = new CsvTableSink(resultFile, "|");
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 20
Source File: PopularPlacesSql.java    From flink-training-exercises with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// read parameters
		ParameterTool params = ParameterTool.fromArgs(args);
		String input = params.getRequired("input");

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// create a TableEnvironment
		StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

		// register TaxiRideTableSource as table "TaxiRides"
		tEnv.registerTableSource(
				"TaxiRides",
				new TaxiRideTableSource(
						input,
						maxEventDelay,
						servingSpeedFactor));

		// register user-defined functions
		tEnv.registerFunction("isInNYC", new GeoUtils.IsInNYC());
		tEnv.registerFunction("toCellId", new GeoUtils.ToCellId());
		tEnv.registerFunction("toCoords", new GeoUtils.ToCoords());

		Table results = tEnv.sqlQuery(
			"SELECT " +
				"toCoords(cell), wstart, wend, isStart, popCnt " +
			"FROM " +
				"(SELECT " +
					"cell, " +
					"isStart, " +
					"HOP_START(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wstart, " +
					"HOP_END(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wend, " +
					"COUNT(isStart) AS popCnt " +
				"FROM " +
					"(SELECT " +
						"eventTime, " +
						"isStart, " +
						"CASE WHEN isStart THEN toCellId(startLon, startLat) ELSE toCellId(endLon, endLat) END AS cell " +
					"FROM TaxiRides " +
					"WHERE isInNYC(startLon, startLat) AND isInNYC(endLon, endLat)) " +
				"GROUP BY cell, isStart, HOP(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE)) " +
			"WHERE popCnt > 20"
			);

		// convert Table into an append stream and print it
		// (if instead we needed a retraction stream we would use tEnv.toRetractStream)
		tEnv.toAppendStream(results, Row.class).print();

		// execute query
		env.execute();
	}