org.apache.parquet.filter2.predicate.Operators.Column Java Examples

The following examples show how to use org.apache.parquet.filter2.predicate.Operators.Column. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
@Nullable
private FilterPredicate lessThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThan, "exp has to be LessThan");

	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.lt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.lt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.lt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.lt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #2
Source File: ColumnIndexFilter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private RowRanges applyPredicate(Column<?> column, Function<ColumnIndex, PrimitiveIterator.OfInt> func,
    RowRanges rangesForMissingColumns) {
  ColumnPath columnPath = column.getColumnPath();
  if (!columns.contains(columnPath)) {
    return rangesForMissingColumns;
  }

  OffsetIndex oi = columnIndexStore.getOffsetIndex(columnPath);
  ColumnIndex ci = columnIndexStore.getColumnIndex(columnPath);
  if (ci == null) {
    LOGGER.info("No column index for column {} is available; Unable to filter on this column", columnPath);
    return allRows();
  }

  return RowRanges.create(rowCount, func.apply(ci), oi);
}
 
Example #3
Source File: ParquetTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
@Nullable
private FilterPredicate lessThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThan, "exp has to be LessThan");

	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.lt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.lt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.lt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.lt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #4
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate lessThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThanOrEqual, "exp has to be LessThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.ltEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.ltEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.ltEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.ltEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #5
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private Tuple2<Column, Comparable> extractColumnAndLiteral(BinaryComparison comp) {
	TypeInformation<?> typeInfo = getLiteralType(comp);
	String columnName = getColumnName(comp);

	// fetch literal and ensure it is comparable
	Object value = getLiteral(comp);
	// validate that literal is comparable
	if (!(value instanceof Comparable)) {
		LOG.warn("Encountered a non-comparable literal of type {}." +
			"Cannot push predicate [{}] into ParquetTablesource." +
			"This is a bug and should be reported.", value.getClass().getCanonicalName(), comp);
		return null;
	}

	if (typeInfo == BasicTypeInfo.BYTE_TYPE_INFO ||
		typeInfo == BasicTypeInfo.SHORT_TYPE_INFO ||
		typeInfo == BasicTypeInfo.INT_TYPE_INFO) {
		return new Tuple2<>(FilterApi.intColumn(columnName), (Integer) value);
	} else if (typeInfo == BasicTypeInfo.LONG_TYPE_INFO) {
		return new Tuple2<>(FilterApi.longColumn(columnName), (Long) value);
	} else if (typeInfo == BasicTypeInfo.FLOAT_TYPE_INFO) {
		return new Tuple2<>(FilterApi.floatColumn(columnName), (Float) value);
	} else if (typeInfo == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return new Tuple2<>(FilterApi.booleanColumn(columnName), (Boolean) value);
	} else if (typeInfo == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return new Tuple2<>(FilterApi.doubleColumn(columnName), (Double) value);
	} else if (typeInfo == BasicTypeInfo.STRING_TYPE_INFO) {
		return new Tuple2<>(FilterApi.binaryColumn(columnName), Binary.fromString((String) value));
	} else {
		// unsupported type
		return null;
	}
}
 
Example #6
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThan, "exp has to be GreaterThan");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #7
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThanOrEqual, "exp has to be GreaterThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gtEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gtEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gtEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gtEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #8
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThanOrEqual, "exp has to be GreaterThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gtEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gtEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gtEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gtEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #9
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate lessThanOrEqual(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof LessThanOrEqual, "exp has to be LessThanOrEqual");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.ltEq((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.ltEq((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.ltEq((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.ltEq((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #10
Source File: ParquetTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nullable
private FilterPredicate greaterThan(Expression exp, Tuple2<Column, Comparable> columnPair) {
	Preconditions.checkArgument(exp instanceof GreaterThan, "exp has to be GreaterThan");
	if (columnPair.f0 instanceof IntColumn) {
		return FilterApi.gt((IntColumn) columnPair.f0, (Integer) columnPair.f1);
	} else if (columnPair.f0 instanceof LongColumn) {
		return FilterApi.gt((LongColumn) columnPair.f0, (Long) columnPair.f1);
	} else if (columnPair.f0 instanceof DoubleColumn) {
		return FilterApi.gt((DoubleColumn) columnPair.f0, (Double) columnPair.f1);
	} else if (columnPair.f0 instanceof FloatColumn) {
		return FilterApi.gt((FloatColumn) columnPair.f0, (Float) columnPair.f1);
	}

	return null;
}
 
Example #11
Source File: SchemaCompatibilityValidator.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private <T extends Comparable<T>> void validateColumn(Column<T> column) {
  ColumnPath path = column.getColumnPath();

  Class<?> alreadySeen = columnTypesEncountered.get(path);
  if (alreadySeen != null && !alreadySeen.equals(column.getColumnType())) {
    throw new IllegalArgumentException("Column: "
        + path.toDotString()
        + " was provided with different types in the same predicate."
        + " Found both: (" + alreadySeen + ", " + column.getColumnType() + ")");
  }

  if (alreadySeen == null) {
    columnTypesEncountered.put(path, column.getColumnType());
  }

  ColumnDescriptor descriptor = getColumnDescriptor(path);
  if (descriptor == null) {
    // the column is missing from the schema. evaluation uses calls
    // updateNull() a value is missing, so this will be handled correctly.
    return;
  }

  if (descriptor.getMaxRepetitionLevel() > 0) {
    throw new IllegalArgumentException("FilterPredicates do not currently support repeated columns. "
        + "Column " + path.toDotString() + " is repeated.");
  }

  ValidTypeMap.assertTypeValid(column, descriptor.getType());
}
 
Example #12
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(GtEq<T> gtEq) {
  Column<T> filterColumn = gtEq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  if (meta == null) {
    // the column is missing and always null, which is never greater than or
    // equal to a value. for all x, null is never >= x.
    return BLOCK_CANNOT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v >= someValue
    // this chunk is all nulls, so we can drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  T value = gtEq.getValue();

  // drop if value > max
  return stats.compareMaxToValue(value) < 0;
}
 
Example #13
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(Gt<T> gt) {
  Column<T> filterColumn = gt.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  if (meta == null) {
    // the column is missing and always null, which is never greater than a
    // value. for all x, null is never > x.
    return BLOCK_CANNOT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v > someValue
    // this chunk is all nulls, so we can drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  T value = gt.getValue();

  // drop if value >= max
  return stats.compareMaxToValue(value) <= 0;
}
 
Example #14
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(LtEq<T> ltEq) {
  Column<T> filterColumn = ltEq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  if (meta == null) {
    // the column is missing and always null, which is never less than or
    // equal to a value. for all x, null is never <= x.
    return BLOCK_CANNOT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v <= someValue
    // this chunk is all nulls, so we can drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  T value = ltEq.getValue();

  // drop if value < min
  return stats.compareMinToValue(value) > 0;
}
 
Example #15
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(Lt<T> lt) {
  Column<T> filterColumn = lt.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  if (meta == null) {
    // the column is missing and always null, which is never less than a
    // value. for all x, null is never < x.
    return BLOCK_CANNOT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v < someValue
    // this chunk is all nulls, so we can drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  T value = lt.getValue();

  // drop if value <= min
  return stats.compareMinToValue(value) >= 0;
}
 
Example #16
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(NotEq<T> notEq) {
  Column<T> filterColumn = notEq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  T value = notEq.getValue();

  if (meta == null) {
    if (value == null) {
      // null is always equal to null
      return BLOCK_CANNOT_MATCH;
    }
    return BLOCK_MIGHT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (value == null) {
    // we are looking for records where v notEq(null)
    // so, if this is a column of all nulls, we can drop it
    return isAllNulls(meta);
  }

  if (stats.isNumNullsSet() && hasNulls(meta)) {
    // we are looking for records where v notEq(someNonNull)
    // but this chunk contains nulls, we cannot drop it
    return BLOCK_MIGHT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  // drop if this is a column where min = max = value
  return stats.compareMinToValue(value) == 0 && stats.compareMaxToValue(value) == 0;
}
 
Example #17
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(Eq<T> eq) {
  Column<T> filterColumn = eq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  T value = eq.getValue();

  if (meta == null) {
    // the column isn't in this file so all values are null.
    if (value != null) {
      // non-null is never null
      return BLOCK_CANNOT_MATCH;
    }
    return BLOCK_MIGHT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (value == null) {
    // We don't know anything about the nulls in this chunk
    if (!stats.isNumNullsSet()) {
      return BLOCK_MIGHT_MATCH;
    }
    // we are looking for records where v eq(null)
    // so drop if there are no nulls in this chunk
    return !hasNulls(meta);
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v eq(someNonNull)
    // and this is a column of all nulls, so drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  // drop if value < min || value > max
  return stats.compareMinToValue(value) > 0 || stats.compareMaxToValue(value) < 0;
}
 
Example #18
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(UserDefined<T, U> ud, boolean inverted) {
  Column<T> filterColumn = ud.getColumn();
  ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
  U udp = ud.getUserDefinedPredicate();

  if (columnChunk == null) {
    // the column isn't in this file so all values are null.
    // lets run the udp with null value to see if it keeps null or not.
    if (inverted) {
      return udp.acceptsNullValue();
    } else {
      return !udp.acceptsNullValue();
    }
  }

  Statistics<T> stats = columnChunk.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(columnChunk)) {
    // lets run the udp with null value to see if it keeps null or not.
    if (inverted) {
      return udp.acceptsNullValue();
    } else {
      return !udp.acceptsNullValue();
    }
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  org.apache.parquet.filter2.predicate.Statistics<T> udpStats =
    new org.apache.parquet.filter2.predicate.Statistics<T>(stats.genericGetMin(), stats.genericGetMax(),
      stats.comparator());

  if (inverted) {
    return udp.inverseCanDrop(udpStats);
  } else {
    return udp.canDrop(udpStats);
  }
}
 
Example #19
Source File: ParquetTableSource.java    From flink with Apache License 2.0 4 votes vote down vote up
@Nullable
private Tuple2<Column, Comparable> extractColumnAndLiteral(BinaryComparison comp) {
	String columnName = getColumnName(comp);
	ColumnPath columnPath = ColumnPath.fromDotString(columnName);
	TypeInformation<?> typeInfo = null;
	try {
		Type type = parquetSchema.getType(columnPath.toArray());
		typeInfo = ParquetSchemaConverter.convertParquetTypeToTypeInfo(type);
	} catch (InvalidRecordException e) {
		LOG.error("Pushed predicate on undefined field name {} in schema", columnName);
		return null;
	}

	// fetch literal and ensure it is comparable
	Object value = getLiteral(comp);
	// validate that literal is comparable
	if (!(value instanceof Comparable)) {
		LOG.warn("Encountered a non-comparable literal of type {}." +
			"Cannot push predicate [{}] into ParquetTablesource." +
			"This is a bug and should be reported.", value.getClass().getCanonicalName(), comp);
		return null;
	}

	if (typeInfo == BasicTypeInfo.BYTE_TYPE_INFO ||
		typeInfo == BasicTypeInfo.SHORT_TYPE_INFO ||
		typeInfo == BasicTypeInfo.INT_TYPE_INFO) {
		return new Tuple2<>(FilterApi.intColumn(columnName), ((Number) value).intValue());
	} else if (typeInfo == BasicTypeInfo.LONG_TYPE_INFO) {
		return new Tuple2<>(FilterApi.longColumn(columnName), ((Number) value).longValue());
	} else if (typeInfo == BasicTypeInfo.FLOAT_TYPE_INFO) {
		return new Tuple2<>(FilterApi.floatColumn(columnName), ((Number) value).floatValue());
	} else if (typeInfo == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return new Tuple2<>(FilterApi.booleanColumn(columnName), (Boolean) value);
	} else if (typeInfo == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return new Tuple2<>(FilterApi.doubleColumn(columnName), ((Number) value).doubleValue());
	} else if (typeInfo == BasicTypeInfo.STRING_TYPE_INFO) {
		return new Tuple2<>(FilterApi.binaryColumn(columnName), Binary.fromString((String) value));
	} else {
		// unsupported type
		return null;
	}
}
 
Example #20
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records that pass the provided {@link UserDefinedPredicate}
 * <p>
 * The provided instance of UserDefinedPredicate must be serializable.
 *
 * @param column a column reference created by FilterApi
 * @param udp a user-defined predicate instance
 * @param <T> the Java type of values in the column
 * @param <U> a user-defined predicate for values of type T
 * @return a user-defined predicate for the given column
 */
public static <T extends Comparable<T>, U extends UserDefinedPredicate<T> & Serializable>
  UserDefined<T, U> userDefined(Column<T> column, U udp) {
  return new UserDefinedByInstance<>(column, udp);
}
 
Example #21
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records that pass the provided {@link UserDefinedPredicate}
 * <p>
 * The provided class must have a default constructor. To use an instance
 * of a UserDefinedPredicate instead, see userDefined below.
 *
 * @param column a column reference created by FilterApi
 * @param clazz a user-defined predicate class
 * @param <T> the Java type of values in the column
 * @param <U> a user-defined predicate for values of type T
 * @return a user-defined predicate for the given column
 */
public static <T extends Comparable<T>, U extends UserDefinedPredicate<T>>
  UserDefined<T, U> userDefined(Column<T> column, Class<U> clazz) {
  return new UserDefinedByClass<>(column, clazz);
}
 
Example #22
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records if their value is greater than or equal to the provided value.
 * The provided value cannot be null, as less than null has no meaning.
 * Records with null values will be dropped.
 * <p>
 * For example:
 *   gtEq(column, 7) will keep all records whose value is greater than or equal to 7, and not null.
 *
 * @param column a column reference created by FilterApi
 * @param value a value that matches the column's type
 * @param <T> the Java type of values in the column
 * @param <C> the column type that corresponds to values of type T
 * @return a greater-than-or-equal predicate for the given column and value
 */
public static <T extends Comparable<T>, C extends Column<T> & SupportsLtGt> GtEq<T> gtEq(C column, T value) {
  return new GtEq<>(column, value);
}
 
Example #23
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records if their value is greater than (but not equal to) the provided value.
 * The provided value cannot be null, as less than null has no meaning.
 * Records with null values will be dropped.
 * <p>
 * For example:
 *   gt(column, 7) will keep all records whose value is greater than (but not equal to) 7, and not null.
 *
 * @param column a column reference created by FilterApi
 * @param value a value that matches the column's type
 * @param <T> the Java type of values in the column
 * @param <C> the column type that corresponds to values of type T
 * @return a greater-than predicate for the given column and value
 */
public static <T extends Comparable<T>, C extends Column<T> & SupportsLtGt> Gt<T> gt(C column, T value) {
  return new Gt<>(column, value);
}
 
Example #24
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records if their value is less than or equal to the provided value.
 * The provided value cannot be null, as less than null has no meaning.
 * Records with null values will be dropped.
 * <p>
 * For example:
 *   ltEq(column, 7) will keep all records whose value is less than or equal to 7, and not null.
 *
 * @param column a column reference created by FilterApi
 * @param value a value that matches the column's type
 * @param <T> the Java type of values in the column
 * @param <C> the column type that corresponds to values of type T
 * @return a less-than-or-equal predicate for the given column and value
 */
public static <T extends Comparable<T>, C extends Column<T> & SupportsLtGt> LtEq<T> ltEq(C column, T value) {
  return new LtEq<>(column, value);
}
 
Example #25
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records if their value is less than (but not equal to) the provided value.
 * The provided value cannot be null, as less than null has no meaning.
 * Records with null values will be dropped.
 * <p>
 * For example:
 *   lt(column, 7) will keep all records whose value is less than (but not equal to) 7, and not null.
 *
 * @param column a column reference created by FilterApi
 * @param value a value that matches the column's type
 * @param <T> the Java type of values in the column
 * @param <C> the column type that corresponds to values of type T
 * @return a less-than predicate for the given column and value
 */
public static <T extends Comparable<T>, C extends Column<T> & SupportsLtGt> Lt<T> lt(C column, T value) {
  return new Lt<>(column, value);
}
 
Example #26
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records if their value is not equal to the provided value.
 * Nulls are treated the same way the java programming language does.
 * <p>
 * For example:
 *   notEq(column, null) will keep all records whose value is not null.
 *   notEq(column, 7) will keep all records whose value is not 7, including records whose value is null.
 *
 *   NOTE: this is different from how some query languages handle null. For example, SQL and pig will drop
 *   nulls when you filter by not equal to 7. To achieve similar behavior in this api, do:
 *   and(notEq(column, 7), notEq(column, null))
 *
 *   NOTE: be sure to read the {@link #lt}, {@link #ltEq}, {@link #gt}, {@link #gtEq} operator's docs
 *         for how they handle nulls
 *
 * @param column a column reference created by FilterApi
 * @param value a value that matches the column's type
 * @param <T> the Java type of values in the column
 * @param <C> the column type that corresponds to values of type T
 * @return a not-equals predicate for the given column and value
 */
public static <T extends Comparable<T>, C extends Column<T> & SupportsEqNotEq> NotEq<T> notEq(C column, T value) {
  return new NotEq<>(column, value);
}
 
Example #27
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Keeps records if their value is equal to the provided value.
 * Nulls are treated the same way the java programming language does.
 * <p>
 * For example:
 *   eq(column, null) will keep all records whose value is null.
 *   eq(column, 7) will keep all records whose value is 7, and will drop records whose value is null
 *
 * @param column a column reference created by FilterApi
 * @param value a value that matches the column's type
 * @param <T> the Java type of values in the column
 * @param <C> the column type that corresponds to values of type T
 * @return an equals predicate for the given column and value
 */
public static <T extends Comparable<T>, C extends Column<T> & SupportsEqNotEq> Eq<T> eq(C column, T value) {
  return new Eq<>(column, value);
}