Java Code Examples for io.prestosql.spi.predicate.TupleDomain#filter()

The following examples show how to use io.prestosql.spi.predicate.TupleDomain#filter() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: WindowFilterPushDown.java From presto with Apache License 2.0

6 votes

private PlanNode rewriteFilterSource(FilterNode filterNode, PlanNode source, Symbol rowNumberSymbol, int upperBound)
{
    ExtractionResult extractionResult = fromPredicate(metadata, session, filterNode.getPredicate(), types);
    TupleDomain<Symbol> tupleDomain = extractionResult.getTupleDomain();

    if (!allRowNumberValuesInDomain(tupleDomain, rowNumberSymbol, upperBound)) {
        return new FilterNode(filterNode.getId(), source, filterNode.getPredicate());
    }

    // Remove the row number domain because it is absorbed into the node
    TupleDomain<Symbol> newTupleDomain = tupleDomain.filter((symbol, domain) -> !symbol.equals(rowNumberSymbol));
    Expression newPredicate = ExpressionUtils.combineConjuncts(
            metadata,
            extractionResult.getRemainingExpression(),
            domainTranslator.toPredicate(newTupleDomain));

    if (newPredicate.equals(BooleanLiteral.TRUE_LITERAL)) {
        return source;
    }
    return new FilterNode(filterNode.getId(), source, newPredicate);
}

Example 2

Source File: TpchIndexMetadata.java From presto with Apache License 2.0

5 votes

@Override
public Optional<ConnectorResolvedIndex> resolveIndex(
        ConnectorSession session,
        ConnectorTableHandle tableHandle,
        Set<ColumnHandle> indexableColumns,
        Set<ColumnHandle> outputColumns,
        TupleDomain<ColumnHandle> tupleDomain)
{
    TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle;

    // Keep the fixed values that don't overlap with the indexableColumns
    // Note: technically we could more efficiently utilize the overlapped columns, but this way is simpler for now

    Map<ColumnHandle, NullableValue> fixedValues = TupleDomain.extractFixedValues(tupleDomain).orElse(ImmutableMap.of())
            .entrySet().stream()
            .filter(entry -> !indexableColumns.contains(entry.getKey()))
            .filter(entry -> !entry.getValue().isNull()) // strip nulls since meaningless in index join lookups
            .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

    // determine all columns available for index lookup
    Set<String> lookupColumnNames = ImmutableSet.<String>builder()
            .addAll(handleToNames(ImmutableList.copyOf(indexableColumns)))
            .addAll(handleToNames(ImmutableList.copyOf(fixedValues.keySet())))
            .build();

    // do we have an index?
    if (indexedData.getIndexedTable(tpchTableHandle.getTableName(), tpchTableHandle.getScaleFactor(), lookupColumnNames).isEmpty()) {
        return Optional.empty();
    }

    TupleDomain<ColumnHandle> filteredTupleDomain = tupleDomain.filter((column, domain) -> !fixedValues.containsKey(column));
    TpchIndexHandle indexHandle = new TpchIndexHandle(
            tpchTableHandle.getTableName(),
            tpchTableHandle.getScaleFactor(),
            lookupColumnNames,
            TupleDomain.fromFixedValues(fixedValues));
    return Optional.of(new ConnectorResolvedIndex(indexHandle, filteredTupleDomain));
}

Example 3

Source File: PredicateUtils.java From presto with Apache License 2.0

4 votes

public static TupleDomain<ColumnHandle> filterColumns(TupleDomain<ColumnHandle> predicate, Predicate<TpchColumnHandle> filterPredicate)
{
    return predicate.filter((columnHandle, domain) -> filterPredicate.test((TpchColumnHandle) columnHandle));
}

Example 4

Source File: S3SelectRecordCursorProvider.java From presto with Apache License 2.0

4 votes

@Override
public Optional<ReaderRecordCursorWithProjections> createRecordCursor(
        Configuration configuration,
        ConnectorSession session,
        Path path,
        long start,
        long length,
        long fileSize,
        Properties schema,
        List<HiveColumnHandle> columns,
        TupleDomain<HiveColumnHandle> effectivePredicate,
        DateTimeZone hiveStorageTimeZone,
        TypeManager typeManager,
        boolean s3SelectPushdownEnabled)
{
    if (!s3SelectPushdownEnabled) {
        return Optional.empty();
    }

    try {
        this.hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
    }
    catch (IOException e) {
        throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed getting FileSystem: " + path, e);
    }

    Optional<ReaderProjections> projectedReaderColumns = projectBaseColumns(columns);
    // Ignore predicates on partial columns for now.
    effectivePredicate = effectivePredicate.filter((column, domain) -> column.isBaseColumn());

    String serdeName = getDeserializerClassName(schema);
    if (CSV_SERDES.contains(serdeName)) {
        List<HiveColumnHandle> readerColumns = projectedReaderColumns
                .map(ReaderProjections::getReaderColumns)
                .orElse(columns);

        IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager);
        String ionSqlQuery = queryBuilder.buildSql(readerColumns, effectivePredicate);
        S3SelectLineRecordReader recordReader = new S3SelectCsvRecordReader(configuration, path, start, length, schema, ionSqlQuery, s3ClientFactory);

        RecordCursor cursor = new S3SelectRecordCursor<>(configuration, path, recordReader, length, schema, readerColumns, hiveStorageTimeZone);
        return Optional.of(new ReaderRecordCursorWithProjections(cursor, projectedReaderColumns));
    }

    // unsupported serdes
    return Optional.empty();
}

Example 5

Source File: HivePartitionManager.java From presto with Apache License 2.0

4 votes

public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, HiveIdentity identity, ConnectorTableHandle tableHandle, Constraint constraint)
{
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary()
            .intersect(hiveTableHandle.getEnforcedConstraint());

    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
    List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();

    if (effectivePredicate.isNone()) {
        return new HivePartitionResult(partitionColumns, ImmutableList.of(), none(), none(), none(), hiveBucketHandle, Optional.empty());
    }

    Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName())
            .orElseThrow(() -> new TableNotFoundException(tableName));

    Optional<HiveBucketFilter> bucketFilter = getHiveBucketFilter(table, effectivePredicate);
    TupleDomain<HiveColumnHandle> compactEffectivePredicate = effectivePredicate
            .transform(HiveColumnHandle.class::cast)
            .simplify(domainCompactionThreshold);

    if (partitionColumns.isEmpty()) {
        return new HivePartitionResult(
                partitionColumns,
                ImmutableList.of(new HivePartition(tableName)),
                compactEffectivePredicate,
                effectivePredicate,
                TupleDomain.all(),
                hiveBucketHandle,
                bucketFilter);
    }

    List<Type> partitionTypes = partitionColumns.stream()
            .map(HiveColumnHandle::getType)
            .collect(toList());

    Iterable<HivePartition> partitionsIterable;
    Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
    if (hiveTableHandle.getPartitions().isPresent()) {
        partitionsIterable = hiveTableHandle.getPartitions().get().stream()
                .filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition))
                .collect(toImmutableList());
    }
    else {
        List<String> partitionNames = getFilteredPartitionNames(metastore, identity, tableName, partitionColumns, effectivePredicate);
        partitionsIterable = () -> partitionNames.stream()
                // Apply extra filters which could not be done by getFilteredPartitionNames
                .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate))
                .filter(Optional::isPresent)
                .map(Optional::get)
                .iterator();
    }

    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = effectivePredicate.filter((column, domain) -> !partitionColumns.contains(column));
    TupleDomain<ColumnHandle> enforcedTupleDomain = effectivePredicate.filter((column, domain) -> partitionColumns.contains(column));
    return new HivePartitionResult(partitionColumns, partitionsIterable, compactEffectivePredicate, remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle, bucketFilter);
}