org.apache.flink.table.sources.TableSource Java Examples
The following examples show how to use
org.apache.flink.table.sources.TableSource.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConnectorCatalogTable.java From flink with Apache License 2.0 | 6 votes |
public static <T1> TableSchema calculateSourceSchema(TableSource<T1> source, boolean isBatch) { TableSchema tableSchema = source.getTableSchema(); if (isBatch) { return tableSchema; } DataType[] types = Arrays.copyOf(tableSchema.getFieldDataTypes(), tableSchema.getFieldCount()); String[] fieldNames = tableSchema.getFieldNames(); if (source instanceof DefinedRowtimeAttributes) { updateRowtimeIndicators((DefinedRowtimeAttributes) source, fieldNames, types); } if (source instanceof DefinedProctimeAttribute) { updateProctimeIndicator((DefinedProctimeAttribute) source, fieldNames, types); } return TableSchema.builder().fields(fieldNames, types).build(); }
Example #2
Source File: FlinkPravegaTableFactoryTest.java From flink-connectors with Apache License 2.0 | 6 votes |
/** * Stream table source expects 'update-mode' configuration to be passed. */ @Test (expected = NoMatchingTableFactoryException.class) public void testMissingStreamMode() { Pravega pravega = new Pravega(); Stream stream = Stream.of(SCOPE, STREAM); pravega.tableSourceReaderBuilder() .forStream(stream) .withPravegaConfig(PRAVEGA_CONFIG); final TestTableDescriptor testDesc = new TestTableDescriptor(pravega) .withFormat(JSON) .withSchema(SCHEMA); final Map<String, String> propertiesMap = testDesc.toProperties(); final TableSource<?> source = TableFactoryService.find(StreamTableSourceFactory.class, propertiesMap) .createStreamTableSource(propertiesMap); TableSourceValidation.validateTableSource(source, TableSchema.builder() .field("name", DataTypes.STRING() ) .field("age", DataTypes.INT()) .build()); fail("update mode configuration validation failed"); }
Example #3
Source File: OrcTableSource.java From flink with Apache License 2.0 | 6 votes |
@Override public TableSource<Row> applyPredicate(List<Expression> predicates) { ArrayList<Predicate> orcPredicates = new ArrayList<>(); // we do not remove any predicates from the list because ORC does not fully apply predicates for (Expression pred : predicates) { Predicate orcPred = toOrcPredicate(pred); if (orcPred != null) { LOG.info("Predicate [{}] converted into OrcPredicate [{}] and pushed into OrcTableSource for path {}.", pred, orcPred, path); orcPredicates.add(orcPred); } else { LOG.info("Predicate [{}] could not be pushed into OrcTableSource for path {}.", pred, path); } } return new OrcTableSource(path, orcSchema, orcConfig, batchSize, recursiveEnumeration, selectedFields, orcPredicates.toArray(new Predicate[]{})); }
Example #4
Source File: OrcTableSource.java From flink with Apache License 2.0 | 6 votes |
@Override public TableSource<Row> applyPredicate(List<Expression> predicates) { ArrayList<Predicate> orcPredicates = new ArrayList<>(); // we do not remove any predicates from the list because ORC does not fully apply predicates for (Expression pred : predicates) { Predicate orcPred = toOrcPredicate(pred); if (orcPred != null) { LOG.info("Predicate [{}] converted into OrcPredicate [{}] and pushed into OrcTableSource for path {}.", pred, orcPred, path); orcPredicates.add(orcPred); } else { LOG.info("Predicate [{}] could not be pushed into OrcTableSource for path {}.", pred, path); } } return new OrcTableSource(path, orcSchema, orcConfig, batchSize, recursiveEnumeration, selectedFields, orcPredicates.toArray(new Predicate[]{})); }
Example #5
Source File: JDBCTableSourceSinkFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testJDBCWithFilter() { Map<String, String> properties = getBasicProperties(); properties.put("connector.driver", "org.apache.derby.jdbc.EmbeddedDriver"); properties.put("connector.username", "user"); properties.put("connector.password", "pass"); final TableSource<?> actual = ((JDBCTableSource) TableFactoryService .find(StreamTableSourceFactory.class, properties) .createStreamTableSource(properties)) .projectFields(new int[] {0, 2}); Map<String, DataType> projectedFields = ((FieldsDataType) actual.getProducedDataType()).getFieldDataTypes(); assertEquals(projectedFields.get("aaa"), DataTypes.INT()); assertNull(projectedFields.get("bbb")); assertEquals(projectedFields.get("ccc"), DataTypes.DOUBLE()); }
Example #6
Source File: TableFactoryUtil.java From flink with Apache License 2.0 | 6 votes |
/** * Creates a {@link TableSource} from a {@link CatalogTable}. * * <p>It considers {@link Catalog#getFactory()} if provided. */ @SuppressWarnings("unchecked") public static <T> TableSource<T> findAndCreateTableSource( Catalog catalog, ObjectIdentifier objectIdentifier, CatalogTable catalogTable, ReadableConfig configuration) { TableSourceFactory.Context context = new TableSourceFactoryContextImpl( objectIdentifier, catalogTable, configuration); Optional<TableFactory> factoryOptional = catalog.getTableFactory(); if (factoryOptional.isPresent()) { TableFactory factory = factoryOptional.get(); if (factory instanceof TableSourceFactory) { return ((TableSourceFactory<T>) factory).createTableSource(context); } else { throw new ValidationException("Cannot query a sink-only table. " + "TableFactory provided by catalog must implement TableSourceFactory"); } } else { return findAndCreateTableSource(context); } }
Example #7
Source File: TableEnvironmentImpl.java From flink with Apache License 2.0 | 6 votes |
private void registerTableSourceInternal(String name, TableSource<?> tableSource) { validateTableSource(tableSource); Optional<CatalogBaseTable> table = getCatalogTable(catalogManager.getBuiltInCatalogName(), catalogManager.getBuiltInDatabaseName(), name); if (table.isPresent()) { if (table.get() instanceof ConnectorCatalogTable<?, ?>) { ConnectorCatalogTable<?, ?> sourceSinkTable = (ConnectorCatalogTable<?, ?>) table.get(); if (sourceSinkTable.getTableSource().isPresent()) { throw new ValidationException(String.format( "Table '%s' already exists. Please choose a different name.", name)); } else { // wrapper contains only sink (not source) replaceTableInternal( name, ConnectorCatalogTable .sourceAndSink(tableSource, sourceSinkTable.getTableSink().get(), !IS_STREAM_TABLE)); } } else { throw new ValidationException(String.format( "Table '%s' already exists. Please choose a different name.", name)); } } else { registerTableInternal(name, ConnectorCatalogTable.source(tableSource, !IS_STREAM_TABLE)); } }
Example #8
Source File: HiveTableFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testHiveTable() throws Exception { TableSchema schema = TableSchema.builder() .field("name", DataTypes.STRING()) .field("age", DataTypes.INT()) .build(); Map<String, String> properties = new HashMap<>(); catalog.createDatabase("mydb", new CatalogDatabaseImpl(new HashMap<>(), ""), true); ObjectPath path = new ObjectPath("mydb", "mytable"); CatalogTable table = new CatalogTableImpl(schema, properties, "hive table"); catalog.createTable(path, table, true); Optional<TableFactory> opt = catalog.getTableFactory(); assertTrue(opt.isPresent()); HiveTableFactory tableFactory = (HiveTableFactory) opt.get(); TableSink tableSink = tableFactory.createTableSink(path, table); assertTrue(tableSink instanceof HiveTableSink); TableSource tableSource = tableFactory.createTableSource(path, table); assertTrue(tableSource instanceof HiveTableSource); }
Example #9
Source File: HiveTableFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGenericTable() throws Exception { TableSchema schema = TableSchema.builder() .field("name", DataTypes.STRING()) .field("age", DataTypes.INT()) .build(); Map<String, String> properties = new HashMap<>(); properties.put(CatalogConfig.IS_GENERIC, String.valueOf(true)); properties.put("connector", "COLLECTION"); catalog.createDatabase("mydb", new CatalogDatabaseImpl(new HashMap<>(), ""), true); ObjectPath path = new ObjectPath("mydb", "mytable"); CatalogTable table = new CatalogTableImpl(schema, properties, "csv table"); catalog.createTable(path, table, true); Optional<TableFactory> opt = catalog.getTableFactory(); assertTrue(opt.isPresent()); HiveTableFactory tableFactory = (HiveTableFactory) opt.get(); TableSource tableSource = tableFactory.createTableSource(path, table); assertTrue(tableSource instanceof StreamTableSource); TableSink tableSink = tableFactory.createTableSink(path, table); assertTrue(tableSink instanceof StreamTableSink); }
Example #10
Source File: KafkaTableSourceSinkFactoryTestBase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testTableSourceCommitOnCheckpointsDisabled() { Map<String, String> propertiesMap = new HashMap<>(); createKafkaSourceProperties().forEach((k, v) -> { if (!k.equals("connector.properties.group.id")) { propertiesMap.put(k, v); } }); final TableSource<?> tableSource = TableFactoryService.find(StreamTableSourceFactory.class, propertiesMap) .createStreamTableSource(propertiesMap); final StreamExecutionEnvironmentMock mock = new StreamExecutionEnvironmentMock(); // Test commitOnCheckpoints flag should be false when do not set consumer group. ((KafkaTableSourceBase) tableSource).getDataStream(mock); assertTrue(mock.sourceFunction instanceof FlinkKafkaConsumerBase); assertFalse(((FlinkKafkaConsumerBase) mock.sourceFunction).getEnableCommitOnCheckpoints()); }
Example #11
Source File: CatalogSchemaTable.java From flink with Apache License 2.0 | 6 votes |
private Optional<TableSource<?>> findAndCreateTableSource() { Optional<TableSource<?>> tableSource = Optional.empty(); try { if (lookupResult.getTable() instanceof CatalogTable) { // Use an empty config for TableSourceFactoryContextImpl since we can't fetch the // actual TableConfig here. And currently the empty config do not affect the logic. ReadableConfig config = new Configuration(); TableSourceFactory.Context context = new TableSourceFactoryContextImpl(tableIdentifier, (CatalogTable) lookupResult.getTable(), config); TableSource<?> source = TableFactoryUtil.findAndCreateTableSource(context); if (source instanceof StreamTableSource) { if (!isStreamingMode && !((StreamTableSource<?>) source).isBounded()) { throw new ValidationException("Cannot query on an unbounded source in batch mode, but " + tableIdentifier.asSummaryString() + " is unbounded."); } tableSource = Optional.of(source); } else { throw new ValidationException("Catalog tables only support " + "StreamTableSource and InputFormatTableSource."); } } } catch (Exception e) { tableSource = Optional.empty(); } return tableSource; }
Example #12
Source File: JdbcTableSourceSinkFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testJdbcFieldsProjection() { Map<String, String> properties = getBasicProperties(); properties.put("connector.driver", "org.apache.derby.jdbc.EmbeddedDriver"); properties.put("connector.username", "user"); properties.put("connector.password", "pass"); final TableSource<?> actual = ((JdbcTableSource) TableFactoryService .find(StreamTableSourceFactory.class, properties) .createStreamTableSource(properties)) .projectFields(new int[] {0, 2}); List<DataType> projectedFields = actual.getProducedDataType().getChildren(); assertEquals(Arrays.asList(DataTypes.INT(), DataTypes.DOUBLE()), projectedFields); // test jdbc table source description List<String> fieldNames = ((RowType) actual.getProducedDataType().getLogicalType()).getFieldNames(); String expectedSourceDescription = actual.getClass().getSimpleName() + "(" + String.join(", ", fieldNames.stream().toArray(String[]::new)) + ")"; assertEquals(expectedSourceDescription, actual.explainSource()); }
Example #13
Source File: HiveTableFactory.java From flink with Apache License 2.0 | 6 votes |
@Override public TableSource<RowData> createTableSource(TableSourceFactory.Context context) { CatalogTable table = checkNotNull(context.getTable()); Preconditions.checkArgument(table instanceof CatalogTableImpl); boolean isGeneric = Boolean.parseBoolean(table.getProperties().get(CatalogConfig.IS_GENERIC)); if (!isGeneric) { return new HiveTableSource( new JobConf(hiveConf), context.getConfiguration(), context.getObjectIdentifier().toObjectPath(), table); } else { return TableFactoryUtil.findAndCreateTableSource(context); } }
Example #14
Source File: HBaseConnectorITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testHBaseLookupTableSource() throws Exception { if (OLD_PLANNER.equals(planner)) { // lookup table source is only supported in blink planner, skip for old planner return; } StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment streamTableEnv = StreamTableEnvironment.create(streamEnv, streamSettings); StreamITCase.clear(); // prepare a source table String srcTableName = "src"; DataStream<Row> ds = streamEnv.fromCollection(testData2).returns(testTypeInfo2); Table in = streamTableEnv.fromDataStream(ds, "a, b, c, proc.proctime"); streamTableEnv.registerTable(srcTableName, in); Map<String, String> tableProperties = hbaseTableProperties(); TableSource source = TableFactoryService .find(HBaseTableFactory.class, tableProperties) .createTableSource(tableProperties); streamTableEnv.registerTableSource("hbaseLookup", source); // perform a temporal table join query String query = "SELECT a,family1.col1, family3.col3 FROM src " + "JOIN hbaseLookup FOR SYSTEM_TIME AS OF src.proc as h ON src.a = h.rk"; Table result = streamTableEnv.sqlQuery(query); DataStream<Row> resultSet = streamTableEnv.toAppendStream(result, Row.class); resultSet.addSink(new StreamITCase.StringSink<>()); streamEnv.execute(); List<String> expected = new ArrayList<>(); expected.add("1,10,Welt-1"); expected.add("2,20,Welt-2"); expected.add("3,30,Welt-3"); expected.add("3,30,Welt-3"); StreamITCase.compareWithList(expected); }
Example #15
Source File: KuduTableSource.java From bahir-flink with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> projectFields(int[] ints) { String[] fieldNames = new String[ints.length]; RowType producedDataType = (RowType) getProducedDataType().getLogicalType(); List<String> prevFieldNames = producedDataType.getFieldNames(); for (int i = 0; i < ints.length; i++) { fieldNames[i] = prevFieldNames.get(ints[i]); } return new KuduTableSource(configBuilder, tableInfo, flinkSchema, fieldNames); }
Example #16
Source File: ParquetTableSource.java From flink with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> applyPredicate(List<Expression> predicates) { // try to convert Flink filter expressions to Parquet FilterPredicates List<FilterPredicate> convertedPredicates = new ArrayList<>(predicates.size()); List<Expression> unsupportedExpressions = new ArrayList<>(predicates.size()); for (Expression toConvert : predicates) { FilterPredicate convertedPredicate = toParquetPredicate(toConvert); if (convertedPredicate != null) { convertedPredicates.add(convertedPredicate); } else { unsupportedExpressions.add(toConvert); } } // update list of Flink expressions to unsupported expressions predicates.clear(); predicates.addAll(unsupportedExpressions); // construct single Parquet FilterPredicate FilterPredicate parquetPredicate = null; if (!convertedPredicates.isEmpty()) { // concat converted predicates with AND parquetPredicate = convertedPredicates.get(0); for (FilterPredicate converted : convertedPredicates.subList(1, convertedPredicates.size())) { parquetPredicate = FilterApi.and(parquetPredicate, converted); } } // create and return a new ParquetTableSource with Parquet FilterPredicate return new ParquetTableSource(path, parquetSchema, this.parquetConfig, recursiveEnumeration, selectedFields, parquetPredicate); }
Example #17
Source File: RichTableSourceQueryOperation.java From flink with Apache License 2.0 | 5 votes |
public RichTableSourceQueryOperation( ObjectIdentifier identifier, TableSource<T> tableSource, FlinkStatistic statistic) { super(tableSource, false); Preconditions.checkArgument(tableSource instanceof StreamTableSource, "Blink planner should always use StreamTableSource."); this.statistic = statistic; this.identifier = identifier; }
Example #18
Source File: StreamTableSourceFactory.java From flink with Apache License 2.0 | 5 votes |
/** * Only create a stream table source. */ @Override default TableSource<T> createTableSource(Map<String, String> properties) { StreamTableSource<T> source = createStreamTableSource(properties); if (source == null) { throw new ValidationException( "Please override 'createTableSource(Context)' method."); } return source; }
Example #19
Source File: HiveTableSource.java From flink with Apache License 2.0 | 5 votes |
@Override public TableSource<RowData> projectFields(int[] fields) { return new HiveTableSource( jobConf, flinkConf, tablePath, catalogTable, remainingPartitions, hiveVersion, partitionPruned, fields, isLimitPushDown, limit); }
Example #20
Source File: ConnectorCatalogTable.java From flink with Apache License 2.0 | 5 votes |
@VisibleForTesting protected ConnectorCatalogTable( TableSource<T1> tableSource, TableSink<T2> tableSink, TableSchema tableSchema, boolean isBatch) { super(tableSchema, Collections.emptyMap(), ""); this.tableSource = tableSource; this.tableSink = tableSink; this.isBatch = isBatch; }
Example #21
Source File: TableFactoryUtil.java From flink with Apache License 2.0 | 5 votes |
/** * Returns a table source matching the properties. */ @SuppressWarnings("unchecked") private static <T> TableSource<T> findAndCreateTableSource(Map<String, String> properties) { try { return TableFactoryService .find(TableSourceFactory.class, properties) .createTableSource(properties); } catch (Throwable t) { throw new TableException("findAndCreateTableSource failed.", t); } }
Example #22
Source File: CsvTableSinkFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBatchTableSourceFactory() { DescriptorProperties descriptor = createDescriptor(testingSchema); TableSource sink = createTableSource(descriptor); assertTrue(sink instanceof CsvTableSource); assertEquals(testingSchema.toRowDataType(), sink.getProducedDataType()); }
Example #23
Source File: MockTableSourceFactory.java From AthenaX with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> create(Map<String, String> properties) { DescriptorProperties params = new DescriptorProperties(true); params.putProperties(properties); TableSchema schema = params.getTableSchema(TABLE_SCHEMA_CONNECTOR_PROPERTY); List<Row> rows = deserializeRows(params.getString(TABLE_DATA_CONNECTOR_PROPERTY)); return new MockTableSource(rows, new RowTypeInfo(schema.getTypes(), schema.getColumnNames())); }
Example #24
Source File: ConnectorCatalogTable.java From flink with Apache License 2.0 | 5 votes |
@VisibleForTesting protected ConnectorCatalogTable( TableSource<T1> tableSource, TableSink<T2> tableSink, TableSchema tableSchema, boolean isBatch) { super(tableSchema, Collections.emptyMap(), ""); this.tableSource = tableSource; this.tableSink = tableSink; this.isBatch = isBatch; }
Example #25
Source File: DatabaseCalciteSchema.java From flink with Apache License 2.0 | 5 votes |
private Table convertConnectorTable( ConnectorCatalogTable<?, ?> table, ObjectPath tablePath) throws TableNotExistException { if (table.getTableSource().isPresent()) { TableSource<?> tableSource = table.getTableSource().get(); if (!(tableSource instanceof StreamTableSource || tableSource instanceof LookupableTableSource)) { throw new TableException( "Only StreamTableSource and LookupableTableSource can be used in Blink planner."); } if (!isStreamingMode && tableSource instanceof StreamTableSource && !((StreamTableSource<?>) tableSource).isBounded()) { throw new TableException("Only bounded StreamTableSource can be used in batch mode."); } TableStats tableStats = TableStats.UNKNOWN; // TODO supports stats for partitionable table if (!table.isPartitioned()) { CatalogTableStatistics tableStatistics = catalog.getTableStatistics(tablePath); CatalogColumnStatistics columnStatistics = catalog.getTableColumnStatistics(tablePath); tableStats = convertToTableStats(tableStatistics, columnStatistics); } return new TableSourceTable<>( tableSource, isStreamingMode, FlinkStatistic.builder().tableStats(tableStats).build()); } else { Optional<TableSinkTable> tableSinkTable = table.getTableSink() .map(tableSink -> new TableSinkTable<>( tableSink, FlinkStatistic.UNKNOWN())); if (tableSinkTable.isPresent()) { return tableSinkTable.get(); } else { throw new TableException("Cannot convert a connector table " + "without either source or sink."); } } }
Example #26
Source File: TableSourceFactoryMock.java From flink with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> createTableSource(Map<String, String> properties) { final DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putProperties(properties); final TableSchema schema = TableSchemaUtils.getPhysicalSchema( descriptorProperties.getTableSchema(Schema.SCHEMA)); return new TableSourceMock(schema); }
Example #27
Source File: RichTableSourceQueryOperation.java From flink with Apache License 2.0 | 5 votes |
public RichTableSourceQueryOperation( TableSource<T> tableSource, FlinkStatistic statistic) { super(tableSource, false); Preconditions.checkArgument(tableSource instanceof StreamTableSource, "Blink planner should always use StreamTableSource."); this.statistic = statistic; }
Example #28
Source File: QueryOperationConverter.java From flink with Apache License 2.0 | 5 votes |
@Override public <U> RelNode visit(TableSourceQueryOperation<U> tableSourceOperation) { TableSource<?> tableSource = tableSourceOperation.getTableSource(); boolean isBatch; if (tableSource instanceof LookupableTableSource) { isBatch = tableSourceOperation.isBatch(); } else if (tableSource instanceof StreamTableSource) { isBatch = ((StreamTableSource<?>) tableSource).isBounded(); } else { throw new TableException(String.format("%s is not supported.", tableSource.getClass().getSimpleName())); } FlinkStatistic statistic; List<String> names; if (tableSourceOperation instanceof RichTableSourceQueryOperation && ((RichTableSourceQueryOperation<U>) tableSourceOperation).getQualifiedName() != null) { statistic = ((RichTableSourceQueryOperation<U>) tableSourceOperation).getStatistic(); names = ((RichTableSourceQueryOperation<U>) tableSourceOperation).getQualifiedName(); } else { statistic = FlinkStatistic.UNKNOWN(); // TableSourceScan requires a unique name of a Table for computing a digest. // We are using the identity hash of the TableSource object. String refId = "Unregistered_TableSource_" + System.identityHashCode(tableSource); names = Collections.singletonList(refId); } TableSourceTable<?> tableSourceTable = new TableSourceTable<>(tableSource, !isBatch, statistic); FlinkRelOptTable table = FlinkRelOptTable.create( relBuilder.getRelOptSchema(), tableSourceTable.getRowType(relBuilder.getTypeFactory()), names, tableSourceTable); return LogicalTableScan.create(relBuilder.getCluster(), table); }
Example #29
Source File: AbstractFlinkClient.java From alchemy with Apache License 2.0 | 5 votes |
private void registerSource(StreamTableEnvironment env, SqlSubmitFlinkRequest request, Map<String, TableSource> tableSources, Map<String, SourceDescriptor> sideSources) { request.getSources().forEach(consumer -> { try { TableType tableType = consumer.getTableType(); switch (tableType) { case SIDE: sideSources.put(consumer.getName(), consumer); LOGGER.info("register side table, name:{}, class:{}", consumer.getName(), consumer.getClass()); break; case VIEW: String sql = consumer.getSql(); Table table = registerSql(env, sql, tableSources, sideSources); env.registerTable(consumer.getName(), table); LOGGER.info("register view, name:{}", consumer.getName()); break; case TABLE: TableSource tableSource = consumer.transform(); env.registerTableSource(consumer.getName(), tableSource); tableSources.put(consumer.getName(), tableSource); LOGGER.info("register table, name:{}, class:{}", consumer.getName(), tableSource.getClass()); break; default: throw new UnsupportedOperationException("Unknow tableType" + tableType); } } catch (Exception e) { throw new RuntimeException(e); } }); }
Example #30
Source File: ParquetTableSource.java From flink with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> applyPredicate(List<Expression> predicates) { // try to convert Flink filter expressions to Parquet FilterPredicates List<FilterPredicate> convertedPredicates = new ArrayList<>(predicates.size()); List<Expression> unsupportedExpressions = new ArrayList<>(predicates.size()); for (Expression toConvert : predicates) { FilterPredicate convertedPredicate = toParquetPredicate(toConvert); if (convertedPredicate != null) { convertedPredicates.add(convertedPredicate); } else { unsupportedExpressions.add(toConvert); } } // update list of Flink expressions to unsupported expressions predicates.clear(); predicates.addAll(unsupportedExpressions); // construct single Parquet FilterPredicate FilterPredicate parquetPredicate = null; if (!convertedPredicates.isEmpty()) { // concat converted predicates with AND parquetPredicate = convertedPredicates.get(0); for (FilterPredicate converted : convertedPredicates.subList(1, convertedPredicates.size())) { parquetPredicate = FilterApi.and(parquetPredicate, converted); } } // create and return a new ParquetTableSource with Parquet FilterPredicate return new ParquetTableSource(path, parquetSchema, this.parquetConfig, recursiveEnumeration, selectedFields, parquetPredicate); }