org.apache.flink.table.descriptors.Schema Java Examples

The following examples show how to use org.apache.flink.table.descriptors.Schema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkPravegaTableFactoryTest.java    From flink-connectors with Apache License 2.0 6 votes vote down vote up
/**
 * Rowtime attribute should be of type TIMESTAMP.
 */
@Test (expected = ValidationException.class)
public void testWrongRowTimeAttributeType() {
    final Schema schema = new Schema()
            .field("name", DataTypes.STRING())
            .field("age", DataTypes.INT()).rowtime(new Rowtime()
                                                            .timestampsFromField("age")
                                                            .watermarksFromStrategy(
                                                                    new BoundedOutOfOrderTimestamps(30000L)));
    Pravega pravega = new Pravega();
    Stream stream = Stream.of(SCOPE, STREAM);
    pravega.tableSourceReaderBuilder()
            .forStream(stream)
            .withPravegaConfig(PRAVEGA_CONFIG);
    final TestTableDescriptor testDesc = new TestTableDescriptor(pravega)
            .withFormat(JSON)
            .withSchema(schema)
            .inAppendMode();
    final Map<String, String> propertiesMap = testDesc.toProperties();
    FlinkPravegaTableFactoryBase tableFactoryBase = new FlinkPravegaStreamTableSourceFactory();
    tableFactoryBase.createFlinkPravegaTableSource(propertiesMap);
    fail("Schema validation failed");
}
 
Example #2
Source File: CsvRowFormatFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = new HashMap<>();
	properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties());
	properties.putAll(new Csv().deriveSchema().toProperties());

	final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build();
	final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build();

	final SerializationSchema<?> actualSer = TableFactoryService
		.find(SerializationSchemaFactory.class, properties)
		.createSerializationSchema(properties);

	assertEquals(expectedSer, actualSer);

	final DeserializationSchema<?> actualDeser = TableFactoryService
		.find(DeserializationSchemaFactory.class, properties)
		.createDeserializationSchema(properties);

	assertEquals(expectedDeser, actualDeser);
}
 
Example #3
Source File: TableFactoryService.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Performs filtering for special cases (i.e. table format factories with schema derivation).
 */
private static List<String> filterSupportedPropertiesFactorySpecific(TableFactory factory, List<String> keys) {

	if (factory instanceof TableFormatFactory) {
		boolean includeSchema = ((TableFormatFactory) factory).supportsSchemaDerivation();
		return keys.stream().filter(k -> {
			if (includeSchema) {
				return k.startsWith(Schema.SCHEMA + ".") ||
					k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			} else {
				return k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			}
		}).collect(Collectors.toList());
	} else {
		return keys;
	}
}
 
Example #4
Source File: KafkaTableSourceSinkFactoryTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
protected Map<String, String> createKafkaSinkProperties() {
	return new TestTableDescriptor(
		new Kafka()
			.version(getKafkaVersion())
			.topic(TOPIC)
			.properties(KAFKA_PROPERTIES)
			.sinkPartitionerFixed()
			.startFromSpecificOffsets(OFFSETS)) // test if they accepted although not needed
		.withFormat(new TestTableFormat())
		.withSchema(
			new Schema()
				.field(FRUIT_NAME, DataTypes.STRING())
				.field(COUNT, DataTypes.DECIMAL(10, 4))
				.field(EVENT_TIME, DataTypes.TIMESTAMP(3)))
		.inAppendMode()
		.toProperties();
}
 
Example #5
Source File: CatalogTableImpTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testToProperties() {
	TableSchema schema = createTableSchema();
	Map<String, String> prop = createProperties();
	CatalogTable table = new CatalogTableImpl(
		schema,
		createPartitionKeys(),
		prop,
		TEST
	);

	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(table.toProperties());

	assertEquals(schema, descriptorProperties.getTableSchema(Schema.SCHEMA));
}
 
Example #6
Source File: CatalogTableImpTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testToProperties() {
	TableSchema schema = createTableSchema();
	Map<String, String> prop = createProperties();
	CatalogTable table = new CatalogTableImpl(
		schema,
		createPartitionKeys(),
		prop,
		TEST
	);

	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(table.toProperties());

	assertEquals(schema, descriptorProperties.getTableSchema(Schema.SCHEMA));
}
 
Example #7
Source File: TableFactoryService.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Performs filtering for special cases (i.e. table format factories with schema derivation).
 */
private static List<String> filterSupportedPropertiesFactorySpecific(TableFactory factory, List<String> keys) {

	if (factory instanceof TableFormatFactory) {
		boolean includeSchema = ((TableFormatFactory) factory).supportsSchemaDerivation();
		return keys.stream().filter(k -> {
			if (includeSchema) {
				return k.startsWith(Schema.SCHEMA + ".") ||
					k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			} else {
				return k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			}
		}).collect(Collectors.toList());
	} else {
		return keys;
	}
}
 
Example #8
Source File: KafkaTableSourceSinkFactoryTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
protected Map<String, String> createKafkaSourceProperties() {
	return new TestTableDescriptor(
			new Kafka()
				.version(getKafkaVersion())
				.topic(TOPIC)
				.properties(KAFKA_PROPERTIES)
				.sinkPartitionerRoundRobin() // test if accepted although not needed
				.startFromSpecificOffsets(OFFSETS))
			.withFormat(new TestTableFormat())
			.withSchema(
				new Schema()
					.field(FRUIT_NAME, DataTypes.STRING()).from(NAME)
					.field(COUNT, DataTypes.DECIMAL(38, 18)) // no from so it must match with the input
					.field(EVENT_TIME, DataTypes.TIMESTAMP(3)).rowtime(
						new Rowtime().timestampsFromField(TIME).watermarksPeriodicAscending())
					.field(PROC_TIME, DataTypes.TIMESTAMP(3)).proctime())
			.toProperties();
}
 
Example #9
Source File: CsvRowFormatFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = new HashMap<>();
	properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties());
	properties.putAll(new Csv().toProperties());

	final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build();
	final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build();

	final SerializationSchema<?> actualSer = TableFactoryService
		.find(SerializationSchemaFactory.class, properties)
		.createSerializationSchema(properties);

	assertEquals(expectedSer, actualSer);

	final DeserializationSchema<?> actualDeser = TableFactoryService
		.find(DeserializationSchemaFactory.class, properties)
		.createDeserializationSchema(properties);

	assertEquals(expectedDeser, actualDeser);
}
 
Example #10
Source File: CsvRowFormatFactoryTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = new HashMap<>();
	properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties());
	properties.putAll(new Csv().deriveSchema().toProperties());

	final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build();
	final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build();

	final SerializationSchema<?> actualSer = TableFactoryService
		.find(SerializationSchemaFactory.class, properties)
		.createSerializationSchema(properties);

	assertEquals(expectedSer, actualSer);

	final DeserializationSchema<?> actualDeser = TableFactoryService
		.find(DeserializationSchemaFactory.class, properties)
		.createDeserializationSchema(properties);

	assertEquals(expectedDeser, actualDeser);
}
 
Example #11
Source File: FlinkPravegaTableFactoryTest.java    From flink-connectors with Apache License 2.0 6 votes vote down vote up
/**
 * Processing time attribute should be of type TIMESTAMP.
 */
@Test (expected = ValidationException.class)
public void testWrongProcTimeAttributeType() {
    final Schema schema = new Schema()
            .field("name", DataTypes.STRING())
            .field("age", DataTypes.INT()).proctime();

    Pravega pravega = new Pravega();
    Stream stream = Stream.of(SCOPE, STREAM);
    pravega.tableSourceReaderBuilder()
            .forStream(stream)
            .withPravegaConfig(PRAVEGA_CONFIG);
    final TestTableDescriptor testDesc = new TestTableDescriptor(pravega)
            .withFormat(JSON)
            .withSchema(schema)
            .inAppendMode();
    final Map<String, String> propertiesMap = testDesc.toProperties();
    FlinkPravegaTableFactoryBase tableFactoryBase = new FlinkPravegaStreamTableSourceFactory();
    tableFactoryBase.createFlinkPravegaTableSource(propertiesMap);
    fail("Schema validation failed");
}
 
Example #12
Source File: ElasticsearchUpsertTableSinkFactoryTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
protected Map<String, String> createElasticSearchProperties() {
	return new TestTableDescriptor(
		new Elasticsearch()
			.version(getElasticsearchVersion())
			.host(HOSTNAME, PORT, SCHEMA)
			.index(INDEX)
			.documentType(DOC_TYPE)
			.keyDelimiter(KEY_DELIMITER)
			.keyNullLiteral(KEY_NULL_LITERAL)
			.bulkFlushBackoffExponential()
			.bulkFlushBackoffDelay(123L)
			.bulkFlushBackoffMaxRetries(3)
			.bulkFlushInterval(100L)
			.bulkFlushMaxActions(1000)
			.bulkFlushMaxSize("1 MB")
			.failureHandlerCustom(DummyFailureHandler.class)
			.connectionMaxRetryTimeout(100)
			.connectionPathPrefix("/myapp"))
		.withFormat(
			new Json()
				.deriveSchema())
		.withSchema(
			new Schema()
				.field(FIELD_KEY, DataTypes.BIGINT())
				.field(FIELD_FRUIT_NAME, DataTypes.STRING())
				.field(FIELD_COUNT, DataTypes.DECIMAL(10, 4))
				.field(FIELD_TS, DataTypes.TIMESTAMP(3)))
		.inUpsertMode()
		.toProperties();
}
 
Example #13
Source File: FlinkTableITCase.java    From flink-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchTableSinkUsingDescriptor() throws Exception {

    // create a Pravega stream for test purposes
    Stream stream = Stream.of(setupUtils.getScope(), "testBatchTableSinkUsingDescriptor");
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    // create a Flink Table environment
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(1);
    BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

    Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES));

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(setupUtils.getPravegaConfig());

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(true))
            .withSchema(new Schema().field("category", DataTypes.STRING()).
                    field("value", DataTypes.INT()));
    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap)
            .createBatchTableSink(propertiesMap);

    String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSinkPath),
            connectorCatalogSinkTable, false);
    table.insertInto("PravegaSink");
    env.execute();
}
 
Example #14
Source File: RedisDescriptorTest.java    From bahir-flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRedisDescriptor() throws Exception {
    DataStreamSource<Row> source = (DataStreamSource<Row>) env.addSource(new TestSourceFunctionString())
            .returns(new RowTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Long.class)));

    EnvironmentSettings settings = EnvironmentSettings
            .newInstance()
            .useOldPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, settings);
    tableEnvironment.registerDataStream("t1", source, "k, v");

    Redis redis = new Redis()
            .mode(RedisValidator.REDIS_CLUSTER)
            .command(RedisCommand.INCRBY_EX.name())
            .ttl(100000)
            .property(RedisValidator.REDIS_NODES, REDIS_HOST+ ":" + REDIS_PORT);

    tableEnvironment
            .connect(redis).withSchema(new Schema()
            .field("k", TypeInformation.of(String.class))
            .field("v", TypeInformation.of(Long.class)))
            .registerTableSink("redis");


    tableEnvironment.sqlUpdate("insert into redis select k, v from t1");
    env.execute("Test Redis Table");
}
 
Example #15
Source File: JsonRowFormatFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json()
			.deriveSchema());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}
 
Example #16
Source File: CatalogTableImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, String> toProperties() {
	DescriptorProperties descriptor = new DescriptorProperties();

	descriptor.putTableSchema(Schema.SCHEMA, getSchema());
	descriptor.putPartitionKeys(getPartitionKeys());

	Map<String, String> properties = new HashMap<>(getProperties());
	properties.remove(CatalogConfig.IS_GENERIC);

	descriptor.putProperties(properties);

	return descriptor.asMap();
}
 
Example #17
Source File: CatalogTableImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Construct a {@link CatalogTableImpl} from complete properties that contains table schema.
 */
public static CatalogTableImpl fromProperties(Map<String, String> properties) {
	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(properties);
	TableSchema tableSchema = descriptorProperties.getTableSchema(Schema.SCHEMA);
	List<String> partitionKeys = descriptorProperties.getPartitionKeys();
	return new CatalogTableImpl(
			tableSchema,
			partitionKeys,
			removeRedundant(properties, tableSchema, partitionKeys),
			""
	);
}
 
Example #18
Source File: CatalogTableImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Construct catalog table properties from {@link #toProperties()}.
 */
public static Map<String, String> removeRedundant(
		Map<String, String> properties,
		TableSchema schema,
		List<String> partitionKeys) {
	Map<String, String> ret = new HashMap<>(properties);
	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putTableSchema(Schema.SCHEMA, schema);
	descriptorProperties.putPartitionKeys(partitionKeys);
	descriptorProperties.asMap().keySet().forEach(ret::remove);
	return ret;
}
 
Example #19
Source File: TableExampleWordCount.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false,可能会有点疑问,为啥会多出一个字段。
    //Sink 做的事情是先删除再插入,false 表示删除上一条数据,true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}
 
Example #20
Source File: TableSourceFactoryMock.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public TableSource<Row> createTableSource(Map<String, String> properties) {
	final DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(properties);
	final TableSchema schema = descriptorProperties.getTableSchema(Schema.SCHEMA);
	return new TableSourceMock(schema.toRowDataType(), schema);
}
 
Example #21
Source File: TableEnvironmentTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConnect() throws Exception {
	final TableEnvironmentMock tableEnv = TableEnvironmentMock.getStreamingInstance();

	tableEnv
		.connect(new ConnectorDescriptorMock(TableSourceFactoryMock.CONNECTOR_TYPE_VALUE, 1, true))
		.withFormat(new FormatDescriptorMock("my_format", 1))
		.withSchema(new Schema()
			.field("my_field_0", "INT")
			.field("my_field_1", "BOOLEAN"))
		.inAppendMode()
		.registerTableSource("my_table");

	final Catalog catalog = tableEnv.getCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG)
		.orElseThrow(AssertionError::new);

	final CatalogBaseTable table = catalog
		.getTable(new ObjectPath(EnvironmentSettings.DEFAULT_BUILTIN_DATABASE, "my_table"));

	assertThat(
		table.getSchema(),
		equalTo(
			TableSchema.builder()
				.field("my_field_0", DataTypes.INT())
				.field("my_field_1", DataTypes.BOOLEAN())
				.build()));

	final ConnectorCatalogTable<?, ?> connectorCatalogTable = (ConnectorCatalogTable<?, ?>) table;

	assertThat(
		connectorCatalogTable.getTableSource().isPresent(),
		equalTo(true));
}
 
Example #22
Source File: TableEnvironmentTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConnect() {
	final TableEnvironmentMock tableEnv = TableEnvironmentMock.getStreamingInstance();

	tableEnv
		.connect(new ConnectorDescriptorMock(TableSourceFactoryMock.CONNECTOR_TYPE_VALUE, 1, true))
		.withFormat(new FormatDescriptorMock("my_format", 1))
		.withSchema(new Schema()
			.field("my_field_0", "INT")
			.field("my_field_1", "BOOLEAN")
			.field("my_part_1", "BIGINT")
			.field("my_part_2", "STRING"))
		.withPartitionKeys(Arrays.asList("my_part_1", "my_part_2"))
		.inAppendMode()
		.createTemporaryTable("my_table");

	CatalogManager.TableLookupResult lookupResult = tableEnv.catalogManager.getTable(ObjectIdentifier.of(
		EnvironmentSettings.DEFAULT_BUILTIN_CATALOG,
		EnvironmentSettings.DEFAULT_BUILTIN_DATABASE,
		"my_table"))
		.orElseThrow(AssertionError::new);

	assertThat(lookupResult.isTemporary(), equalTo(true));

	CatalogBaseTable catalogBaseTable = lookupResult.getTable();
	assertTrue(catalogBaseTable instanceof CatalogTable);
	CatalogTable table = (CatalogTable) catalogBaseTable;
	assertCatalogTable(table);
	assertCatalogTable(CatalogTableImpl.fromProperties(table.toProperties()));
}
 
Example #23
Source File: TableSourceFactoryMock.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public TableSource<Row> createTableSource(Map<String, String> properties) {
	final DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(properties);
	final TableSchema schema = TableSchemaUtils.getPhysicalSchema(
		descriptorProperties.getTableSchema(Schema.SCHEMA));
	return new TableSourceMock(schema);
}
 
Example #24
Source File: TableExampleWordCount.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false,可能会有点疑问,为啥会多出一个字段。
    //Sink 做的事情是先删除再插入,false 表示删除上一条数据,true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}
 
Example #25
Source File: JsonRowFormatFactoryTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json()
			.deriveSchema());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}
 
Example #26
Source File: JsonRowFormatFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json()
			.deriveSchema());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}
 
Example #27
Source File: JsonRowFormatFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaDerivationByDefault() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}
 
Example #28
Source File: FlinkTableITCase.java    From flink-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Validates the use of Pravega Table Descriptor to generate the source/sink Table factory to
 * write and read from Pravega stream using {@link BatchTableEnvironment}
 * @throws Exception
 */
@Test
public void testBatchTableUsingDescriptor() throws Exception {

    final String scope = setupUtils.getScope();
    final String streamName = "stream";
    Stream stream = Stream.of(scope, streamName);
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(1);
    BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

    PravegaConfig pravegaConfig = setupUtils.getPravegaConfig();

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);
    pravega.tableSourceReaderBuilder()
            .withReaderGroupScope(stream.getScope())
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(false))
            .withSchema(new Schema().
                    field("category", DataTypes.STRING()).
                    field("value", DataTypes.INT()));
    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap)
            .createBatchTableSink(propertiesMap);
    final TableSource<?> source = TableFactoryService.find(BatchTableSourceFactory.class, propertiesMap)
            .createBatchTableSource(propertiesMap);

    Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES));

    String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogTableSink = ConnectorCatalogTable.sink(sink, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSinkPath),
            connectorCatalogTableSink, false);

    table.insertInto("PravegaSink");
    env.execute();

    String tableSourcePath = tableEnv.getCurrentDatabase() + "." + "samples";

    ConnectorCatalogTable<?, ?> connectorCatalogTableSource = ConnectorCatalogTable.source(source, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSourcePath),
            connectorCatalogTableSource, false);

    // select some sample data from the Pravega-backed table, as a view
    Table view = tableEnv.sqlQuery("SELECT * FROM samples WHERE category IN ('A','B')");

    // convert the view to a dataset and collect the results for comparison purposes
    List<SampleRecord> results = tableEnv.toDataSet(view, SampleRecord.class).collect();
    Assert.assertEquals(new HashSet<>(SAMPLES), new HashSet<>(results));
}
 
Example #29
Source File: HiveTableUtil.java    From flink with Apache License 2.0 4 votes vote down vote up
public static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table, HiveConf hiveConf) {
	if (!(table instanceof CatalogTableImpl) && !(table instanceof CatalogViewImpl)) {
		throw new CatalogException(
				"HiveCatalog only supports CatalogTableImpl and CatalogViewImpl");
	}
	// let Hive set default parameters for us, e.g. serialization.format
	Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(),
			tablePath.getObjectName());
	hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));

	Map<String, String> properties = new HashMap<>(table.getProperties());
	// Table comment
	if (table.getComment() != null) {
		properties.put(HiveCatalogConfig.COMMENT, table.getComment());
	}

	boolean isGeneric = HiveCatalog.isGenericForCreate(properties);

	// Hive table's StorageDescriptor
	StorageDescriptor sd = hiveTable.getSd();
	HiveTableUtil.setDefaultStorageFormat(sd, hiveConf);

	if (isGeneric) {
		DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
		tableSchemaProps.putTableSchema(Schema.SCHEMA, table.getSchema());

		if (table instanceof CatalogTable) {
			tableSchemaProps.putPartitionKeys(((CatalogTable) table).getPartitionKeys());
		}

		properties.putAll(tableSchemaProps.asMap());
		properties = maskFlinkProperties(properties);
		hiveTable.setParameters(properties);
	} else {
		HiveTableUtil.initiateTableFromProperties(hiveTable, properties, hiveConf);
		List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema());
		// Table columns and partition keys
		if (table instanceof CatalogTableImpl) {
			CatalogTable catalogTable = (CatalogTableImpl) table;

			if (catalogTable.isPartitioned()) {
				int partitionKeySize = catalogTable.getPartitionKeys().size();
				List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize);
				List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size());

				sd.setCols(regularColumns);
				hiveTable.setPartitionKeys(partitionColumns);
			} else {
				sd.setCols(allColumns);
				hiveTable.setPartitionKeys(new ArrayList<>());
			}
		} else {
			sd.setCols(allColumns);
		}
		// Table properties
		hiveTable.getParameters().putAll(properties);
	}

	if (table instanceof CatalogViewImpl) {
		// TODO: [FLINK-12398] Support partitioned view in catalog API
		hiveTable.setPartitionKeys(new ArrayList<>());

		CatalogView view = (CatalogView) table;
		hiveTable.setViewOriginalText(view.getOriginalQuery());
		hiveTable.setViewExpandedText(view.getExpandedQuery());
		hiveTable.setTableType(TableType.VIRTUAL_VIEW.name());
	}

	return hiveTable;
}
 
Example #30
Source File: HiveCatalog.java    From flink with Apache License 2.0 4 votes vote down vote up
private CatalogBaseTable instantiateCatalogTable(Table hiveTable, HiveConf hiveConf) {
	boolean isView = TableType.valueOf(hiveTable.getTableType()) == TableType.VIRTUAL_VIEW;

	// Table properties
	Map<String, String> properties = hiveTable.getParameters();

	boolean isGeneric = isGenericForGet(hiveTable.getParameters());

	TableSchema tableSchema;
	// Partition keys
	List<String> partitionKeys = new ArrayList<>();

	if (isGeneric) {
		properties = retrieveFlinkProperties(properties);
		DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
		tableSchemaProps.putProperties(properties);
		ObjectPath tablePath = new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName());
		tableSchema = tableSchemaProps.getOptionalTableSchema(Schema.SCHEMA)
				.orElseThrow(() -> new CatalogException("Failed to get table schema from properties for generic table " + tablePath));
		partitionKeys = tableSchemaProps.getPartitionKeys();
		// remove the schema from properties
		properties = CatalogTableImpl.removeRedundant(properties, tableSchema, partitionKeys);
	} else {
		properties.put(CatalogConfig.IS_GENERIC, String.valueOf(false));
		// Table schema
		List<FieldSchema> fields = getNonPartitionFields(hiveConf, hiveTable);
		Set<String> notNullColumns = client.getNotNullColumns(hiveConf, hiveTable.getDbName(), hiveTable.getTableName());
		Optional<UniqueConstraint> primaryKey = isView ? Optional.empty() :
				client.getPrimaryKey(hiveTable.getDbName(), hiveTable.getTableName(), HiveTableUtil.relyConstraint((byte) 0));
		// PK columns cannot be null
		primaryKey.ifPresent(pk -> notNullColumns.addAll(pk.getColumns()));
		tableSchema = HiveTableUtil.createTableSchema(fields, hiveTable.getPartitionKeys(), notNullColumns, primaryKey.orElse(null));

		if (!hiveTable.getPartitionKeys().isEmpty()) {
			partitionKeys = getFieldNames(hiveTable.getPartitionKeys());
		}
	}

	String comment = properties.remove(HiveCatalogConfig.COMMENT);

	if (isView) {
		return new CatalogViewImpl(
				hiveTable.getViewOriginalText(),
				hiveTable.getViewExpandedText(),
				tableSchema,
				properties,
				comment);
	} else {
		return new CatalogTableImpl(tableSchema, partitionKeys, properties, comment);
	}
}