Java Code Examples for org.apache.flink.table.api.java.StreamTableEnvironment#sqlQuery()

The following examples show how to use org.apache.flink.table.api.java.StreamTableEnvironment#sqlQuery() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SqlTest.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = createTableEnv(env);

	DataStream<Tuple2<Long, String>> ds = env.fromElements(
			Tuple2.of(1L, "a"),
			Tuple2.of(2L, "b"),
			Tuple2.of(3L, "c")
	);

	Table table = tableEnv.fromDataStream(ds, "id, name");

	Table result = tableEnv.sqlQuery("SELECT * from " + table);

	tableEnv.toAppendStream(result, ds.getType()).print();
	env.execute("test");
}
 
Example 2
Source File: JavaSqlITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSelect() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
	StreamITCase.clear();

	DataStream<Tuple3<Integer, Long, String>> ds = JavaStreamTestData.getSmall3TupleDataSet(env);
	Table in = tableEnv.fromDataStream(ds, "a,b,c");
	tableEnv.registerTable("MyTable", in);

	String sqlQuery = "SELECT * FROM MyTable";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<Row>());
	env.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,1,Hi");
	expected.add("2,2,Hello");
	expected.add("3,2,Hello world");

	StreamITCase.compareWithList(expected);
}
 
Example 3
Source File: JavaSqlITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFilter() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
	StreamITCase.clear();

	DataStream<Tuple5<Integer, Long, Integer, String, Long>> ds = JavaStreamTestData.get5TupleDataStream(env);
	tableEnv.registerDataStream("MyTable", ds, "a, b, c, d, e");

	String sqlQuery = "SELECT a, b, e FROM MyTable WHERE c < 4";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<Row>());
	env.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,1,1");
	expected.add("2,2,2");
	expected.add("2,3,1");
	expected.add("3,4,2");

	StreamITCase.compareWithList(expected);
}
 
Example 4
Source File: StreamSQLExample.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

    DataStream<Order> orderA = env.fromCollection(Arrays.asList(
            new Order(1L, "beer", 3),
            new Order(1L, "diaper", 4),
            new Order(3L, "rubber", 2)));

    DataStream<Order> orderB = env.fromCollection(Arrays.asList(
            new Order(2L, "pen", 3),
            new Order(2L, "rubber", 3),
            new Order(4L, "beer", 1)));

    Table tableA = tEnv.fromDataStream(orderA, "user, product, amount");

    tEnv.registerDataStream("OrderB", orderB, "user, product, amount");

    Table result = tEnv.sqlQuery("SELECT * FROM " + tableA + " WHERE amount > 2 UNION ALL " +
            "SELECT * FROM OrderB WHERE amount < 2");

    tEnv.toAppendStream(result, Order.class).print();

    env.execute();
}
 
Example 5
Source File: CustomKafkaSourceMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    blinkStreamTableEnv.registerTableSource("kafkaDataStream", new MyKafkaTableSource(ExecutionEnvUtil.PARAMETER_TOOL));

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Custom Kafka Table Source");
}
 
Example 6
Source File: SQLExampleWordCount.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

    CsvTableSource csvTableSource = CsvTableSource.builder()
            .field("word", Types.STRING)
            .path(path)
            .build();
    blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);
    Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word), word FROM zhisheng GROUP BY word");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}
 
Example 7
Source File: JavaSqlITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testFilter() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
	StreamITCase.clear();

	DataStream<Tuple5<Integer, Long, Integer, String, Long>> ds = JavaStreamTestData.get5TupleDataStream(env);
	tableEnv.registerDataStream("MyTable", ds, "a, b, c, d, e");

	String sqlQuery = "SELECT a, b, e FROM MyTable WHERE c < 4";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<Row>());
	env.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,1,1");
	expected.add("2,2,2");
	expected.add("2,3,1");
	expected.add("3,4,2");

	StreamITCase.compareWithList(expected);
}
 
Example 8
Source File: FlinkSQLDistinctExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String ddlSource = "CREATE TABLE user_behavior (\n" +
            "    user_id BIGINT,\n" +
            "    item_id BIGINT,\n" +
            "    category_id BIGINT,\n" +
            "    behavior STRING,\n" +
            "    ts TIMESTAMP(3)\n" +
            ") WITH (\n" +
            "    'connector.type' = 'kafka',\n" +
            "    'connector.version' = '0.11',\n" +
            "    'connector.topic' = 'user_behavior',\n" +
            "    'connector.startup-mode' = 'latest-offset',\n" +
            "    'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
            "    'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
            "    'format.type' = 'json'\n" +
            ")";

    String countSql = "select user_id, count(user_id) from user_behavior group by user_id";

    blinkStreamTableEnv.sqlUpdate(ddlSource);
    Table countTable = blinkStreamTableEnv.sqlQuery(countSql);
    blinkStreamTableEnv.toRetractStream(countTable, Row.class).print();

    String distinctSql = "select distinct(user_id) from user_behavior";
    Table distinctTable = blinkStreamTableEnv.sqlQuery(distinctSql);
    blinkStreamTableEnv.toRetractStream(distinctTable, Row.class).print("==");

    blinkStreamTableEnv.execute("Blink Stream SQL count/distinct demo");
}
 
Example 9
Source File: StreamSQLExample.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// set up execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

		DataStream<Order> orderA = env.fromCollection(Arrays.asList(
			new Order(1L, "beer", 3),
			new Order(1L, "diaper", 4),
			new Order(3L, "rubber", 2)));

		DataStream<Order> orderB = env.fromCollection(Arrays.asList(
			new Order(2L, "pen", 3),
			new Order(2L, "rubber", 3),
			new Order(4L, "beer", 1)));

		// convert DataStream to Table
		Table tableA = tEnv.fromDataStream(orderA, "user, product, amount");
		// register DataStream as Table
		tEnv.registerDataStream("OrderB", orderB, "user, product, amount");

		// union the two tables
		Table result = tEnv.sqlQuery("SELECT * FROM " + tableA + " WHERE amount > 2 UNION ALL " +
						"SELECT * FROM OrderB WHERE amount < 2");

		tEnv.toAppendStream(result, Order.class).print();

		env.execute();
	}
 
Example 10
Source File: HBaseConnectorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testHBaseLookupFunction() throws Exception {
	StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment streamTableEnv = StreamTableEnvironment.create(streamEnv, streamSettings);
	StreamITCase.clear();

	// prepare a source table
	DataStream<Row> ds = streamEnv.fromCollection(testData2).returns(testTypeInfo2);
	Table in = streamTableEnv.fromDataStream(ds, "a, b, c");
	streamTableEnv.registerTable("src", in);

	Map<String, String> tableProperties = hbaseTableProperties();
	TableSource source = TableFactoryService
		.find(HBaseTableFactory.class, tableProperties)
		.createTableSource(tableProperties);

	streamTableEnv.registerFunction("hbaseLookup", ((HBaseTableSource) source).getLookupFunction(new String[]{ROWKEY}));

	// perform a temporal table join query
	String sqlQuery = "SELECT a,family1.col1, family3.col3 FROM src, LATERAL TABLE(hbaseLookup(a))";
	Table result = streamTableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = streamTableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<>());

	streamEnv.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,10,Welt-1");
	expected.add("2,20,Welt-2");
	expected.add("3,30,Welt-3");
	expected.add("3,30,Welt-3");

	StreamITCase.compareWithList(expected);
}
 
Example 11
Source File: SideStream.java    From alchemy with Apache License 2.0 5 votes vote down vote up
public static DataStream<Row> buildStream(StreamTableEnvironment env, SqlSelect sqlSelect, Alias leftAlias,
    Alias sideAlias, SourceDescriptor sideSource) throws Exception {
    SqlSelect leftSelect = SideParser.newSelect(sqlSelect, leftAlias.getTable(), leftAlias.getAlias(), true, false);
    // register leftTable
    Table leftTable = env.sqlQuery(leftSelect.toString());
    DataStream<Row> leftStream = env.toAppendStream(leftTable, Row.class);
    SqlSelect rightSelect
        = SideParser.newSelect(sqlSelect, sideAlias.getTable(), sideAlias.getAlias(), false, false);
    SqlJoin sqlJoin = (SqlJoin)sqlSelect.getFrom();
    List<String> equalFields = SideParser.findConditionFields(sqlJoin.getCondition(), leftAlias.getAlias());
    if (sideSource.getSide().isPartition()) {
        leftStream = leftStream.keyBy(equalFields.toArray(new String[equalFields.size()]));
    }
    RowTypeInfo sideType = createSideType(rightSelect.getSelectList(), sideSource.getSchema());
    RowTypeInfo returnType = createReturnType(leftTable.getSchema(), sideType);
    SideTable sideTable = createSideTable(leftTable.getSchema(), sideType, sqlJoin.getJoinType(), rightSelect,
        equalFields, sideAlias, sideSource.getSide());
    DataStream<Row> returnStream;
    if (sideSource.getSide().isAsync()) {
        AbstractAsyncSideFunction reqRow = sideSource.transform(sideTable);
        returnStream = AsyncDataStream.orderedWait(leftStream, reqRow, sideSource.getSide().getTimeout(),
            TimeUnit.MILLISECONDS, sideSource.getSide().getCapacity());
    } else {
        AbstractSyncSideFunction syncReqRow = sideSource.transform(sideTable);
        returnStream = leftStream.flatMap(syncReqRow);
    }
    returnStream.getTransformation().setOutputType(returnType);
    return returnStream;
}
 
Example 12
Source File: AbstractFlinkClient.java    From alchemy with Apache License 2.0 4 votes vote down vote up
private Table registerSql(StreamTableEnvironment env, String sql, Map<String, TableSource> tableSources,
                          Map<String, SourceDescriptor> sideSources) throws Exception {
    if (sideSources.isEmpty()) {
        return env.sqlQuery(sql);
    }
    Deque<SqlNode> deque = SideParser.parse(sql);
    SqlNode last;
    SqlSelect modifyNode = null;
    SqlNode fullNode = deque.peekFirst();
    while ((last = deque.pollLast()) != null) {
        if (modifyNode != null) {
            SideParser.rewrite(last, modifyNode);
            modifyNode = null;
        }
        if (last.getKind() == SqlKind.SELECT) {
            SqlSelect sqlSelect = (SqlSelect) last;
            SqlNode selectFrom = sqlSelect.getFrom();
            if (SqlKind.JOIN != selectFrom.getKind()) {
                continue;
            }
            SqlJoin sqlJoin = (SqlJoin) selectFrom;
            Alias sideAlias = SideParser.getTableName(sqlJoin.getRight());
            Alias leftAlias = SideParser.getTableName(sqlJoin.getLeft());
            if (isSide(sideSources.keySet(), leftAlias.getTable())) {
                throw new UnsupportedOperationException("side table must be right table");
            }
            if (!isSide(sideSources.keySet(), sideAlias.getTable())) {
                continue;
            }
            DataStream<Row> dataStream = SideStream.buildStream(env, sqlSelect, leftAlias, sideAlias,
                sideSources.get(sideAlias.getTable()));
            Alias newTable = new Alias(leftAlias.getTable() + "_" + sideAlias.getTable(),
                leftAlias.getAlias() + "_" + sideAlias.getAlias());
            if (!env.isRegistered(newTable.getTable())) {
                env.registerDataStream(newTable.getTable(), dataStream);
            }
            SqlSelect newSelect
                = SideParser.newSelect(sqlSelect, newTable.getTable(), newTable.getAlias(), false, true);
            modifyNode = newSelect;
        }
    }
    if (modifyNode != null) {
        return env.sqlQuery(modifyNode.toString());
    } else {
        return env.sqlQuery(fullNode.toString());
    }

}
 
Example 13
Source File: Sort.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        SingleOutputStreamOperator<Event> source = env.addSource(new OutOfOrderEventSource())
                .assignTimestampsAndWatermarks(new TimestampsAndWatermarks());

        Table table = tableEnv.fromDataStream(source, "eventTime.rowtime");

        tableEnv.registerTable("zhisheng", table);
        Table sorted = tableEnv.sqlQuery("select eventTime from zhisheng order by eventTime");
        DataStream<Row> rowDataStream = tableEnv.toAppendStream(sorted, Row.class);

        rowDataStream.print();

        //把执行计划打印出来
//        System.out.println(env.getExecutionPlan());

        env.execute("sort-streaming-data");

    }
 
Example 14
Source File: FlinkTableITCase.java    From flink-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Validates the use of Pravega Table Descriptor to generate the source/sink Table factory to
 * write and read from Pravega stream using {@link StreamTableEnvironment}
 * @throws Exception
 */
@Test
public void testStreamingTableUsingDescriptor() throws Exception {

    final String scope = setupUtils.getScope();
    final String streamName = "stream";
    Stream stream = Stream.of(scope, streamName);
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment().setParallelism(1);
    StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,
            EnvironmentSettings.newInstance()
                    // watermark is only supported in blink planner
                    .useBlinkPlanner()
                    .inStreamingMode()
                    .build());

    PravegaConfig pravegaConfig = setupUtils.getPravegaConfig();

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);
    pravega.tableSourceReaderBuilder()
            .withReaderGroupScope(stream.getScope())
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);

    TableSchema tableSchema = TableSchema.builder()
            .field("category", DataTypes.STRING())
            .field("value", DataTypes.INT())
            .build();

    Schema schema = new Schema().schema(tableSchema);

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(
                new Json()
                        .failOnMissingField(false)
            )
            .withSchema(schema)
            .inAppendMode();

    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(StreamTableSinkFactory.class, propertiesMap)
            .createStreamTableSink(propertiesMap);
    final TableSource<?> source = TableFactoryService.find(StreamTableSourceFactory.class, propertiesMap)
            .createStreamTableSource(propertiesMap);

    Table table = tableEnv.fromDataStream(env.fromCollection(SAMPLES));

    String tablePathSink = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, false);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog())
            .get()
            .createTable(
            ObjectPath.fromString(tablePathSink),
            connectorCatalogSinkTable, false);

    table.insertInto("PravegaSink");

    ConnectorCatalogTable<?, ?> connectorCatalogSourceTable = ConnectorCatalogTable.source(source, false);
    String tablePathSource = tableEnv.getCurrentDatabase() + "." + "samples";

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tablePathSource),
            connectorCatalogSourceTable, false);
    // select some sample data from the Pravega-backed table, as a view
    Table view = tableEnv.sqlQuery("SELECT * FROM samples WHERE category IN ('A','B')");

    // write the view to a test sink that verifies the data for test purposes
    tableEnv.toAppendStream(view, SampleRecord.class).addSink(new TestSink(SAMPLES));

    // execute the topology
    try {
        env.execute();
        Assert.fail("expected an exception");
    } catch (Exception e) {
        // we expect the job to fail because the test sink throws a deliberate exception.
        Assert.assertTrue(ExceptionUtils.getRootCause(e) instanceof TestCompletionException);
    }
}
 
Example 15
Source File: StreamSQLTestProgram.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");

		StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}
 
Example 16
Source File: PopularPlacesSql.java    From flink-training-exercises with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// read parameters
		ParameterTool params = ParameterTool.fromArgs(args);
		String input = params.getRequired("input");

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

		// create a TableEnvironment
		StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

		// register TaxiRideTableSource as table "TaxiRides"
		tEnv.registerTableSource(
				"TaxiRides",
				new TaxiRideTableSource(
						input,
						maxEventDelay,
						servingSpeedFactor));

		// register user-defined functions
		tEnv.registerFunction("isInNYC", new GeoUtils.IsInNYC());
		tEnv.registerFunction("toCellId", new GeoUtils.ToCellId());
		tEnv.registerFunction("toCoords", new GeoUtils.ToCoords());

		Table results = tEnv.sqlQuery(
			"SELECT " +
				"toCoords(cell), wstart, wend, isStart, popCnt " +
			"FROM " +
				"(SELECT " +
					"cell, " +
					"isStart, " +
					"HOP_START(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wstart, " +
					"HOP_END(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE) AS wend, " +
					"COUNT(isStart) AS popCnt " +
				"FROM " +
					"(SELECT " +
						"eventTime, " +
						"isStart, " +
						"CASE WHEN isStart THEN toCellId(startLon, startLat) ELSE toCellId(endLon, endLat) END AS cell " +
					"FROM TaxiRides " +
					"WHERE isInNYC(startLon, startLat) AND isInNYC(endLon, endLat)) " +
				"GROUP BY cell, isStart, HOP(eventTime, INTERVAL '5' MINUTE, INTERVAL '15' MINUTE)) " +
			"WHERE popCnt > 20"
			);

		// convert Table into an append stream and print it
		// (if instead we needed a retraction stream we would use tEnv.toRetractStream)
		tEnv.toAppendStream(results, Row.class).print();

		// execute query
		env.execute();
	}
 
Example 17
Source File: FlinkPravegaTableITCase.java    From flink-connectors with Apache License 2.0 4 votes vote down vote up
private void testTableSourceStreamingDescriptor(Stream stream, PravegaConfig pravegaConfig) throws Exception {
    final StreamExecutionEnvironment execEnvRead = StreamExecutionEnvironment.getExecutionEnvironment();
    execEnvRead.setParallelism(1);
    execEnvRead.enableCheckpointing(100);
    execEnvRead.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    StreamTableEnvironment tableEnv = StreamTableEnvironment.create(execEnvRead,
            EnvironmentSettings.newInstance()
                    // watermark is only supported in blink planner
                    .useBlinkPlanner()
                    .inStreamingMode()
                    .build());
    RESULTS.clear();

    // read data from the stream using Table reader
    Schema schema = new Schema()
            .field("user", DataTypes.STRING())
            .field("uri", DataTypes.STRING())
            .field("accessTime", DataTypes.TIMESTAMP(3)).rowtime(
                    new Rowtime().timestampsFromField("accessTime").watermarksPeriodicBounded(30000L));

    Pravega pravega = new Pravega();
    pravega.tableSourceReaderBuilder()
            .withReaderGroupScope(stream.getScope())
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(true))
            .withSchema(schema)
            .inAppendMode();

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSource<?> source = TableFactoryService.find(StreamTableSourceFactory.class, propertiesMap)
            .createStreamTableSource(propertiesMap);

    String tableSourcePath = tableEnv.getCurrentDatabase() + "." + "MyTableRow";

    ConnectorCatalogTable<?, ?> connectorCatalogSourceTable = ConnectorCatalogTable.source(source, false);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSourcePath),
            connectorCatalogSourceTable, false);

    String sqlQuery = "SELECT user, " +
            "TUMBLE_END(accessTime, INTERVAL '5' MINUTE) AS accessTime, " +
            "COUNT(uri) AS cnt " +
            "from MyTableRow GROUP BY " +
            "user, TUMBLE(accessTime, INTERVAL '5' MINUTE)";
    Table result = tableEnv.sqlQuery(sqlQuery);

    DataStream<Tuple2<Boolean, Row>> resultSet = tableEnv.toRetractStream(result, Row.class);
    StringSink2 stringSink = new StringSink2(8);
    resultSet.addSink(stringSink);

    try {
        execEnvRead.execute("ReadRowData");
    } catch (Exception e) {
        if (!(ExceptionUtils.getRootCause(e) instanceof SuccessException)) {
            throw e;
        }
    }

    log.info("results: {}", RESULTS);
    boolean compare = compare(RESULTS, getExpectedResultsAppend());
    assertTrue("Output does not match expected result", compare);
}
 
Example 18
Source File: AreasTotalPerHour.java    From infoworld-post with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        // check parameter
        if (args.length != 1) {
            System.err.println("Please provide the path to the taxi rides file as a parameter");
        }
        String inputPath = args[0];

        // create execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // configure event-time and watermarks
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.getConfig().setAutoWatermarkInterval(1000L);

        // create table environment
        StreamTableEnvironment tEnv = TableEnvironment.getTableEnvironment(env);
        // register user-defined function
        tEnv.registerFunction("toCellId", new GeoUtils.ToCellId());

        // get taxi ride event stream
        DataStream<TaxiRide> rides = TaxiRides.getRides(env, inputPath);
        // register taxi ride event stream as table "Rides"
        tEnv.registerDataStream(
            "Rides",
            rides,
            "medallion, licenseId, pickUpTime, dropOffTime.rowtime, " +
                "pickUpLon, pickUpLat, dropOffLon, dropOffLat, total");

        // define SQL query to compute average total per area and hour
        Table result = tEnv.sqlQuery(
            "SELECT " +
            "  toCellId(dropOffLon, dropOffLat) AS area, " +
            "  TUMBLE_START(dropOffTime, INTERVAL '1' HOUR) AS t, " +
            "  AVG(total) AS avgTotal " +
            "FROM Rides " +
            "GROUP BY " +
            "  toCellId(dropOffLon, dropOffLat), " +
            "  TUMBLE(dropOffTime, INTERVAL '1' HOUR)"
        );

        // convert result table into an append stream and print it
        tEnv.toAppendStream(result, Row.class)
            .print();

        // execute the query
        env.execute();
    }
 
Example 19
Source File: TestTableFunction.java    From yauaa with Apache License 2.0 4 votes vote down vote up
@Test
public void testFunctionExtractDirect() throws Exception {
    // The base input stream
    StreamExecutionEnvironment                       senv        = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Tuple3<String, String, String>> inputStream = getTestAgentStream(senv);

    // The table environment
    StreamTableEnvironment tableEnv = StreamTableEnvironment.create(senv);

    // Give the stream a Table Name
    tableEnv.createTemporaryView("AgentStream", inputStream, "useragent, expectedDeviceClass, expectedAgentNameVersionMajor");

    // register the function
    tableEnv.registerFunction("ParseUserAgent", new AnalyzeUseragentFunction("DeviceClass", "AgentNameVersionMajor"));


    // The downside of doing it this way is that the parsing function (i.e. parsing and converting all results into a map)
    // is called for each field you want. So in this simple case twice.
    String sqlQuery =
        "SELECT useragent,"+
        "       ParseUserAgent(useragent)['DeviceClass'          ]  as DeviceClass," +
        "       ParseUserAgent(useragent)['AgentNameVersionMajor']  as AgentNameVersionMajor," +
        "       expectedDeviceClass," +
        "       expectedAgentNameVersionMajor " +
        "FROM AgentStream";
    Table  resultTable   = tableEnv.sqlQuery(sqlQuery);

    TypeInformation<Row> tupleType = new RowTypeInfo(STRING, STRING, STRING, STRING, STRING);
    DataStream<Row> resultSet = tableEnv.toAppendStream(resultTable, tupleType);

    resultSet.map((MapFunction<Row, String>) row -> {
        Object useragent                      = row.getField(0);
        Object deviceClass                    = row.getField(1);
        Object agentNameVersionMajor          = row.getField(2);
        Object expectedDeviceClass            = row.getField(3);
        Object expectedAgentNameVersionMajor  = row.getField(4);

        assertTrue(useragent                     instanceof String);
        assertTrue(deviceClass                   instanceof String);
        assertTrue(agentNameVersionMajor         instanceof String);
        assertTrue(expectedDeviceClass           instanceof String);
        assertTrue(expectedAgentNameVersionMajor instanceof String);

        assertEquals(expectedDeviceClass,           deviceClass,           "Wrong DeviceClass: "           + useragent);
        assertEquals(expectedAgentNameVersionMajor, agentNameVersionMajor, "Wrong AgentNameVersionMajor: " + useragent);
        return useragent.toString();
    }).printToErr();

    senv.execute();
}
 
Example 20
Source File: DemonstrationOfTumblingTableSQLFunction.java    From yauaa with Apache License 2.0 4 votes vote down vote up
@Disabled
@Test
public void runDemonstration() throws Exception {
    // The base input stream
    StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment();
    senv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    senv.getConfig().setAutoWatermarkInterval(1000);

    DataStream<Tuple4<Long, String, String, String>> inputStream = senv
        .addSource(new UAStreamSource())
        .assignTimestampsAndWatermarks(new UAWatermarker());

    // The table environment
    StreamTableEnvironment tableEnv = StreamTableEnvironment.create(senv);

    // Give the stream a Table Name
    tableEnv.createTemporaryView("AgentStream", inputStream, "eventTime.rowtime, useragent, expectedDeviceClass, expectedAgentNameVersionMajor");

    // register the function
    tableEnv.registerFunction("ParseUserAgent", new AnalyzeUseragentFunction("DeviceClass", "AgentNameVersionMajor"));

    int windowIntervalCount =  5;
    String windowIntervalScale =  "MINUTE";

    String sqlQuery = String.format(
        "SELECT" +
        "   TUMBLE_START(eventTime, INTERVAL '%d' %s) AS wStart," +
        "   deviceClass," +
        "   agentNameVersionMajor," +
        "   expectedDeviceClass," +
        "   expectedAgentNameVersionMajor," +
        "   Count('') " +
        "FROM ( "+
        "    SELECT " +
        "       eventTime, " +
        "       parsedUserAgent['DeviceClass'          ]  AS deviceClass," +
        "       parsedUserAgent['AgentNameVersionMajor']  AS agentNameVersionMajor," +
        "       expectedDeviceClass," +
        "       expectedAgentNameVersionMajor" +
        "    FROM ( "+
        "        SELECT " +
        "           eventTime, " +
        "           ParseUserAgent(useragent) AS parsedUserAgent," +
        "           expectedDeviceClass," +
        "           expectedAgentNameVersionMajor" +
        "        FROM AgentStream " +
        "    )" +
        ")" +
        "GROUP BY TUMBLE(eventTime, INTERVAL '%d' %s), " +
            "       deviceClass," +
            "       agentNameVersionMajor," +
            "       expectedDeviceClass," +
            "       expectedAgentNameVersionMajor",
        windowIntervalCount, windowIntervalScale,
        windowIntervalCount, windowIntervalScale
        );
    Table resultTable = tableEnv.sqlQuery(sqlQuery);

    TypeInformation<Row> tupleType = new RowTypeInfo(SQL_TIMESTAMP, STRING, STRING, STRING, STRING, LONG);
    DataStream<Row>      resultSet = tableEnv.toAppendStream(resultTable, tupleType);

    resultSet.print();

    resultSet.map((MapFunction<Row, String>) row -> {
        Object useragent                      = row.getField(0);
        Object deviceClass                    = row.getField(1);
        Object agentNameVersionMajor          = row.getField(2);
        Object expectedDeviceClass            = row.getField(3);
        Object expectedAgentNameVersionMajor  = row.getField(4);

        assertEquals(
            expectedDeviceClass,
            deviceClass,
            "Wrong DeviceClass: " + useragent);

        assertEquals(
            expectedAgentNameVersionMajor,
            agentNameVersionMajor,
            "Wrong AgentNameVersionMajor: " + useragent);

        return useragent.toString();
    });

    senv.execute();
}