Java Code Examples for org.apache.parquet.column.statistics.Statistics#compareMinToValue()

The following examples show how to use org.apache.parquet.column.statistics.Statistics#compareMinToValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(Lt<T> lt) {
  Column<T> filterColumn = lt.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  if (meta == null) {
    // the column is missing and always null, which is never less than a
    // value. for all x, null is never < x.
    return BLOCK_CANNOT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v < someValue
    // this chunk is all nulls, so we can drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  T value = lt.getValue();

  // drop if value <= min
  return stats.compareMinToValue(value) >= 0;
}
 
Example 2
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(LtEq<T> ltEq) {
  Column<T> filterColumn = ltEq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  if (meta == null) {
    // the column is missing and always null, which is never less than or
    // equal to a value. for all x, null is never <= x.
    return BLOCK_CANNOT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v <= someValue
    // this chunk is all nulls, so we can drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  T value = ltEq.getValue();

  // drop if value < min
  return stats.compareMinToValue(value) > 0;
}
 
Example 3
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(Eq<T> eq) {
  Column<T> filterColumn = eq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  T value = eq.getValue();

  if (meta == null) {
    // the column isn't in this file so all values are null.
    if (value != null) {
      // non-null is never null
      return BLOCK_CANNOT_MATCH;
    }
    return BLOCK_MIGHT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (value == null) {
    // We don't know anything about the nulls in this chunk
    if (!stats.isNumNullsSet()) {
      return BLOCK_MIGHT_MATCH;
    }
    // we are looking for records where v eq(null)
    // so drop if there are no nulls in this chunk
    return !hasNulls(meta);
  }

  if (isAllNulls(meta)) {
    // we are looking for records where v eq(someNonNull)
    // and this is a column of all nulls, so drop it
    return BLOCK_CANNOT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  // drop if value < min || value > max
  return stats.compareMinToValue(value) > 0 || stats.compareMaxToValue(value) < 0;
}
 
Example 4
Source File: StatisticsFilter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T extends Comparable<T>> Boolean visit(NotEq<T> notEq) {
  Column<T> filterColumn = notEq.getColumn();
  ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());

  T value = notEq.getValue();

  if (meta == null) {
    if (value == null) {
      // null is always equal to null
      return BLOCK_CANNOT_MATCH;
    }
    return BLOCK_MIGHT_MATCH;
  }

  Statistics<T> stats = meta.getStatistics();

  if (stats.isEmpty()) {
    // we have no statistics available, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  if (value == null) {
    // we are looking for records where v notEq(null)
    // so, if this is a column of all nulls, we can drop it
    return isAllNulls(meta);
  }

  if (stats.isNumNullsSet() && hasNulls(meta)) {
    // we are looking for records where v notEq(someNonNull)
    // but this chunk contains nulls, we cannot drop it
    return BLOCK_MIGHT_MATCH;
  }

  if (!stats.hasNonNullValue()) {
    // stats does not contain min/max values, we cannot drop any chunks
    return BLOCK_MIGHT_MATCH;
  }

  // drop if this is a column where min = max = value
  return stats.compareMinToValue(value) == 0 && stats.compareMaxToValue(value) == 0;
}