org.apache.parquet.filter2.predicate.Operators Java Examples

The following examples show how to use org.apache.parquet.filter2.predicate.Operators. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("checkstyle:MethodTypeParameterName")
private static <C extends Comparable<C>, COL extends Operators.Column<C> & Operators.SupportsLtGt>
    FilterPredicate pred(Operation op, COL col, C value) {
  switch (op) {
    case IS_NULL:
      return FilterApi.eq(col, null);
    case NOT_NULL:
      return FilterApi.notEq(col, null);
    case EQ:
      return FilterApi.eq(col, value);
    case NOT_EQ:
      return FilterApi.notEq(col, value);
    case GT:
      return FilterApi.gt(col, value);
    case GT_EQ:
      return FilterApi.gtEq(col, value);
    case LT:
      return FilterApi.lt(col, value);
    case LT_EQ:
      return FilterApi.ltEq(col, value);
    default:
      throw new UnsupportedOperationException("Unsupported predicate operation: " + op);
  }
}
 
Example #2
Source File: ParquetRecordFilterBuilder.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the FilterPredicate function that supports equals and not equals
 * for the given operator
 *
 * @param operator the operator
 * @param <T>      the type
 * @param <C>      the column type
 * @return the FilterPredicate function
 */
private static <T extends Comparable<T>, C extends Operators.Column<T> & Operators.SupportsEqNotEq> BiFunction<C, T, FilterPredicate> getOperatorWithEqNotEqSupport(Operator operator) {
    switch (operator) {
        case IS_NULL:
        case EQUALS:
        case NOOP:
            return FilterApi::eq;
        // NOT boolean wraps a NOOP
        //       NOT
        //        |
        //       NOOP
        //        |
        //    ---------
        //   |         |
        //   4        true
        // that needs to be replaced with equals
        case IS_NOT_NULL:
        case NOT_EQUALS:
            return FilterApi::notEq;

        default:
            throw new UnsupportedOperationException("not supported " + operator);
    }
}
 
Example #3
Source File: ParquetRecordFilterBuilder.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the FilterPredicate function that supports less than /
 * greater than for the given operator
 *
 * @param operator the operator
 * @param <T>      the type
 * @param <C>      the column type
 * @return the FilterPredicate function
 */
private static <T extends Comparable<T>, C extends Operators.Column<T> & Operators.SupportsLtGt> BiFunction<C, T, FilterPredicate> getOperatorWithLtGtSupport(Operator operator) {

    switch (operator) {
        case LESS_THAN:
            return FilterApi::lt;
        case GREATER_THAN:
            return FilterApi::gt;
        case LESS_THAN_OR_EQUAL:
            return FilterApi::ltEq;
        case GREATER_THAN_OR_EQUAL:
            return FilterApi::gtEq;
        default:
            return getOperatorWithEqNotEqSupport(operator);
    }
}
 
Example #4
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static
<C extends Comparable<C>, COL extends Operators.Column<C> & Operators.SupportsLtGt>
FilterPredicate pred(Operation op, COL col, C value) {
  switch (op) {
    case IS_NULL:
      return FilterApi.eq(col, null);
    case NOT_NULL:
      return FilterApi.notEq(col, null);
    case EQ:
      return FilterApi.eq(col, value);
    case NOT_EQ:
      return FilterApi.notEq(col, value);
    case GT:
      return FilterApi.gt(col, value);
    case GT_EQ:
      return FilterApi.gtEq(col, value);
    case LT:
      return FilterApi.lt(col, value);
    case LT_EQ:
      return FilterApi.ltEq(col, value);
    default:
      throw new UnsupportedOperationException("Unsupported predicate operation: " + op);
  }
}
 
Example #5
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private FilterPredicate buildFilter(OpType op, Column col, Const value) {
  String name = col.getName();
  try {
    FieldSchema f = schema.getField(name);
    switch (f.type) {
      case DataType.BOOLEAN:
        Operators.BooleanColumn boolCol = booleanColumn(name);
        switch(op) {
          case OP_EQ: return eq(boolCol, getValue(value, boolCol.getColumnType()));
          case OP_NE: return notEq(boolCol, getValue(value, boolCol.getColumnType()));
          default: throw new RuntimeException(
              "Operation " + op + " not supported for boolean column: " + name);
        }
      case DataType.INTEGER:
        Operators.IntColumn intCol = intColumn(name);
        return op(op, intCol, value);
      case DataType.LONG:
        Operators.LongColumn longCol = longColumn(name);
        return op(op, longCol, value);
      case DataType.FLOAT:
        Operators.FloatColumn floatCol = floatColumn(name);
        return op(op, floatCol, value);
      case DataType.DOUBLE:
        Operators.DoubleColumn doubleCol = doubleColumn(name);
        return op(op, doubleCol, value);
      case DataType.CHARARRAY:
        Operators.BinaryColumn binaryCol = binaryColumn(name);
        return op(op, binaryCol, value);
      default:
        throw new RuntimeException("Unsupported type " + f.type + " for field: " + name);
    }
  } catch (FrontendException e) {
    throw new RuntimeException("Error processing pushdown for column:" + col, e);
  }
}
 
Example #6
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public <T extends Comparable<T>> Boolean visit(Operators.Eq<T> eq) {
  T value = eq.getValue();

  if (value == null) {
    // the bloom filter bitset contains only non-null values so isn't helpful. this
    // could check the column stats, but the StatisticsFilter is responsible
    return BLOCK_MIGHT_MATCH;
  }

  Operators.Column<T> filterColumn = eq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());
  if (meta == null) {
    // the column isn't in this file so all values are null, but the value
    // must be non-null because of the above check.
    return BLOCK_CANNOT_MATCH;
  }

  try {
    BloomFilter bloomFilter = bloomFilterReader.readBloomFilter(meta);
    if (bloomFilter != null && !bloomFilter.findHash(bloomFilter.hash(value))) {
      return BLOCK_CANNOT_MATCH;
    }
  } catch (RuntimeException e) {
    LOG.warn(e.getMessage());
    return BLOCK_MIGHT_MATCH;
  }

  return BLOCK_MIGHT_MATCH;
}
 
Example #7
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public <T> FilterPredicate predicate(BoundPredicate<T> pred) {
  Operation op = pred.op();
  BoundReference<T> ref = pred.ref();
  Literal<T> lit = pred.literal();
  String path = schema.idToAlias(ref.fieldId());

  switch (ref.type().typeId()) {
    case BOOLEAN:
      Operators.BooleanColumn col = FilterApi.booleanColumn(schema.idToAlias(ref.fieldId()));
      switch (op) {
        case EQ:
          return FilterApi.eq(col, getParquetPrimitive(lit));
        case NOT_EQ:
          return FilterApi.eq(col, getParquetPrimitive(lit));
      }

    case INTEGER:
      return pred(op, FilterApi.intColumn(path), getParquetPrimitive(lit));
    case LONG:
      return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit));
    case FLOAT:
      return pred(op, FilterApi.floatColumn(path), getParquetPrimitive(lit));
    case DOUBLE:
      return pred(op, FilterApi.doubleColumn(path), getParquetPrimitive(lit));
    case DATE:
      return pred(op, FilterApi.intColumn(path), getParquetPrimitive(lit));
    case TIME:
      return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit));
    case TIMESTAMP:
      return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit));
    case STRING:
      return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit));
    case UUID:
      return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit));
    case FIXED:
      return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit));
    case BINARY:
      return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit));
    case DECIMAL:
      return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit));
  }

  throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
}
 
Example #8
Source File: ParquetRecordFilterBuilder.java    From pxf with Apache License 2.0 4 votes vote down vote up
/**
 * Handles simple column-operator-constant expressions.
 *
 * @param operatorNode the operator node
 */
private void processSimpleColumnOperator(OperatorNode operatorNode) {

    Operator operator = operatorNode.getOperator();
    ColumnIndexOperandNode columnIndexOperand = operatorNode.getColumnIndexOperand();
    OperandNode valueOperand = null;

    if (operator != Operator.IS_NULL && operator != Operator.IS_NOT_NULL) {
        valueOperand = operatorNode.getValueOperand();
        if (valueOperand == null) {
            throw new IllegalArgumentException(
                    String.format("Operator %s does not contain an operand", operator));
        }
    }

    ColumnDescriptor columnDescriptor = columnDescriptors.get(columnIndexOperand.index());
    String filterColumnName = columnDescriptor.columnName();
    Type type = fields.get(filterColumnName);

    // INT96 and FIXED_LEN_BYTE_ARRAY cannot be pushed down
    // for more details look at org.apache.parquet.filter2.dictionarylevel.DictionaryFilter#expandDictionary
    // where INT96 and FIXED_LEN_BYTE_ARRAY are not dictionary values
    FilterPredicate simpleFilter;
    switch (type.asPrimitiveType().getPrimitiveTypeName()) {
        case INT32:
            simpleFilter = ParquetRecordFilterBuilder.<Integer, Operators.IntColumn>getOperatorWithLtGtSupport(operator)
                    .apply(intColumn(type.getName()), getIntegerForINT32(type.getOriginalType(), valueOperand));
            break;

        case INT64:
            simpleFilter = ParquetRecordFilterBuilder.<Long, Operators.LongColumn>getOperatorWithLtGtSupport(operator)
                    .apply(longColumn(type.getName()), valueOperand == null ? null : Long.parseLong(valueOperand.toString()));
            break;

        case BINARY:
            simpleFilter = ParquetRecordFilterBuilder.<Binary, Operators.BinaryColumn>getOperatorWithLtGtSupport(operator)
                    .apply(binaryColumn(type.getName()), valueOperand == null ? null : Binary.fromString(valueOperand.toString()));
            break;

        case BOOLEAN:
            // Boolean does not SupportsLtGt
            simpleFilter = ParquetRecordFilterBuilder.<Boolean, Operators.BooleanColumn>getOperatorWithEqNotEqSupport(operator)
                    .apply(booleanColumn(type.getName()), valueOperand == null ? null : Boolean.parseBoolean(valueOperand.toString()));
            break;

        case FLOAT:
            simpleFilter = ParquetRecordFilterBuilder.<Float, Operators.FloatColumn>getOperatorWithLtGtSupport(operator)
                    .apply(floatColumn(type.getName()), valueOperand == null ? null : Float.parseFloat(valueOperand.toString()));
            break;

        case DOUBLE:
            simpleFilter = ParquetRecordFilterBuilder.<Double, Operators.DoubleColumn>getOperatorWithLtGtSupport(operator)
                    .apply(doubleColumn(type.getName()), valueOperand == null ? null : Double.parseDouble(valueOperand.toString()));
            break;

        default:
            throw new UnsupportedOperationException(String.format("Column %s of type %s is not supported",
                    type.getName(), type.asPrimitiveType().getPrimitiveTypeName()));
    }

    filterQueue.push(simpleFilter);
}
 
Example #9
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>> Boolean visit(Operators.NotEq<T> notEq) {
  return BLOCK_MIGHT_MATCH;
}
 
Example #10
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>> Boolean visit(Operators.Lt<T> lt) {
  return BLOCK_MIGHT_MATCH;
}
 
Example #11
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>> Boolean visit(Operators.LtEq<T> ltEq) {
  return BLOCK_MIGHT_MATCH;
}
 
Example #12
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>> Boolean visit(Operators.Gt<T> gt) {
  return BLOCK_MIGHT_MATCH;
}
 
Example #13
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>> Boolean visit(Operators.GtEq<T> gtEq) {
  return BLOCK_MIGHT_MATCH;
}
 
Example #14
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public Boolean visit(Operators.And and) {
  return and.getLeft().accept(this) || and.getRight().accept(this);
}
 
Example #15
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public Boolean visit(Operators.Or or) {
  return or.getLeft().accept(this) && or.getRight().accept(this);
}
 
Example #16
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public Boolean visit(Operators.Not not) {
  throw new IllegalArgumentException(
    "This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter? " + not);
}
 
Example #17
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(Operators.UserDefined<T, U> ud, boolean inverted) {
  return BLOCK_MIGHT_MATCH;
}
 
Example #18
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(Operators.UserDefined<T, U> udp) {
  return visit(udp, false);
}
 
Example #19
Source File: BloomFilterImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(Operators.LogicalNotUserDefined<T, U> udp) {
  return visit(udp.getUserDefined(), true);
}
 
Example #20
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public <T> FilterPredicate predicate(BoundPredicate<T> pred) {
  if (!(pred.term() instanceof BoundReference)) {
    throw new UnsupportedOperationException("Cannot convert non-reference to Parquet filter: " + pred.term());
  }

  Operation op = pred.op();
  BoundReference<T> ref = (BoundReference<T>) pred.term();
  String path = schema.idToAlias(ref.fieldId());
  Literal<T> lit;
  if (pred.isUnaryPredicate()) {
    lit = null;
  } else if (pred.isLiteralPredicate()) {
    lit = pred.asLiteralPredicate().literal();
  } else {
    throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
  }

  switch (ref.type().typeId()) {
    case BOOLEAN:
      Operators.BooleanColumn col = FilterApi.booleanColumn(path);
      switch (op) {
        case EQ:
          return FilterApi.eq(col, getParquetPrimitive(lit));
        case NOT_EQ:
          return FilterApi.notEq(col, getParquetPrimitive(lit));
      }
      break;
    case INTEGER:
    case DATE:
      return pred(op, FilterApi.intColumn(path), getParquetPrimitive(lit));
    case LONG:
    case TIME:
    case TIMESTAMP:
      return pred(op, FilterApi.longColumn(path), getParquetPrimitive(lit));
    case FLOAT:
      return pred(op, FilterApi.floatColumn(path), getParquetPrimitive(lit));
    case DOUBLE:
      return pred(op, FilterApi.doubleColumn(path), getParquetPrimitive(lit));
    case STRING:
    case UUID:
    case FIXED:
    case BINARY:
    case DECIMAL:
      return pred(op, FilterApi.binaryColumn(path), getParquetPrimitive(lit));
  }

  throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
}