Java Code Examples for org.apache.calcite.rel.metadata.RelMetadataQuery#getSelectivity()

The following examples show how to use org.apache.calcite.rel.metadata.RelMetadataQuery#getSelectivity() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DrillFilterRelBase.java    From Bats with Apache License 2.0 6 votes vote down vote up
@Override
public double estimateRowCount(RelMetadataQuery mq) {
  // override Calcite's default selectivity estimate - cap lower/upper bounds on the
  // selectivity estimate in order to get desired parallelism
  double selectivity = mq.getSelectivity(getInput(), condition);
  if (!condition.isAlwaysFalse()) {
    // Cap selectivity at filterMinSelectivityEstimateFactor unless it is always FALSE
    if (selectivity < filterMinSelectivityEstimateFactor) {
      selectivity = filterMinSelectivityEstimateFactor;
    }
  }
  if (!condition.isAlwaysTrue()) {
    // Cap selectivity at filterMaxSelectivityEstimateFactor unless it is always TRUE
    if (selectivity > filterMaxSelectivityEstimateFactor) {
      selectivity = filterMaxSelectivityEstimateFactor;
    }
  }
  // The utility function also considers nulls.
  return NumberUtil.multiply(selectivity, mq.getRowCount(getInput()));
}
 
Example 2
Source File: FilterRelBase.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Override
public double estimateRowCount(RelMetadataQuery mq) {
  // override Calcite's default selectivity estimate - cap lower/upper bounds on the
  // selectivity estimate in order to get desired parallelism
  double selectivity = mq.getSelectivity(getInput(), condition);

  if (!condition.isAlwaysFalse()) {
    // Cap selectivity at filterMinSelectivityEstimateFactor unless it is always FALSE
    if (selectivity < filterMinSelectivityEstimateFactor) {
      selectivity = filterMinSelectivityEstimateFactor;
    }
  }

  if (!condition.isAlwaysTrue()) {
    // Cap selectivity at filterMaxSelectivityEstimateFactor unless it is always TRUE
    if (selectivity > filterMaxSelectivityEstimateFactor) {
      selectivity = filterMaxSelectivityEstimateFactor;
    }
  }

  return selectivity * mq.getRowCount(getInput());
}
 
Example 3
Source File: RelMetadataTest.java    From calcite with Apache License 2.0 5 votes vote down vote up
private void checkFilterSelectivity(
    String sql,
    double expected) {
  RelNode rel = convertSql(sql);
  final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
  Double result = mq.getSelectivity(rel, null);
  assertNotNull(result);
  assertEquals(expected, result, EPSILON);
}
 
Example 4
Source File: RelMetadataTest.java    From calcite with Apache License 2.0 5 votes vote down vote up
private void checkRelSelectivity(
    RelNode rel,
    double expected) {
  final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
  Double result = mq.getSelectivity(rel, null);
  assertNotNull(result);
  assertEquals(expected, result, EPSILON);
}
 
Example 5
Source File: RelMetadataTest.java    From calcite with Apache License 2.0 5 votes vote down vote up
/** Checks that we can cache a metadata request that includes a null
 * argument. */
@Test void testSelectivityAggCached() {
  RelNode rel =
      convertSql("select deptno, count(*) from emp where deptno > 10 "
          + "group by deptno having count(*) = 0");
  rel.getCluster().setMetadataProvider(
      new CachingRelMetadataProvider(
          rel.getCluster().getMetadataProvider(),
          rel.getCluster().getPlanner()));
  final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
  Double result = mq.getSelectivity(rel, null);
  assertThat(result,
      within(DEFAULT_COMP_SELECTIVITY * DEFAULT_EQUAL_SELECTIVITY, EPSILON));
}
 
Example 6
Source File: DrillRelMdSelectivity.java    From Bats with Apache License 2.0 4 votes vote down vote up
private Double getJoinSelectivity(DrillJoinRelBase rel, RelMetadataQuery mq, RexNode predicate) {
  double sel = 1.0;
  // determine which filters apply to the left vs right
  RexNode leftPred, rightPred;
  JoinRelType joinType = rel.getJoinType();
  final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
  int[] adjustments = new int[rel.getRowType().getFieldCount()];

  if (DrillRelOptUtil.guessRows(rel)) {
    return super.getSelectivity(rel, mq, predicate);
  }

  if (predicate != null) {
    RexNode pred;
    List<RexNode> leftFilters = new ArrayList<>();
    List<RexNode> rightFilters = new ArrayList<>();
    List<RexNode> joinFilters = new ArrayList<>();
    List<RexNode> predList = RelOptUtil.conjunctions(predicate);

    RelOptUtil.classifyFilters(
        rel,
        predList,
        joinType,
        joinType == JoinRelType.INNER,
        !joinType.generatesNullsOnLeft(),
        !joinType.generatesNullsOnRight(),
        joinFilters,
        leftFilters,
        rightFilters);
    leftPred =
        RexUtil.composeConjunction(rexBuilder, leftFilters, true);
    rightPred =
        RexUtil.composeConjunction(rexBuilder, rightFilters, true);
    for (RelNode child : rel.getInputs()) {
      RexNode modifiedPred = null;

      if (child == rel.getLeft()) {
        pred = leftPred;
      } else {
        pred = rightPred;
      }
      if (pred != null) {
        // convert the predicate to reference the types of the children
        modifiedPred =
            pred.accept(new RelOptUtil.RexInputConverter(
            rexBuilder,
            null,
            child.getRowType().getFieldList(),
            adjustments));
      }
      sel *= mq.getSelectivity(child, modifiedPred);
    }
    sel *= RelMdUtil.guessSelectivity(RexUtil.composeConjunction(rexBuilder, joinFilters, true));
  }
  return sel;
}
 
Example 7
Source File: DrillRelMdDistinctRowCount.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Estimates the number of rows which would be produced by a GROUP BY on the
 * set of columns indicated by groupKey.
 * column").
 */
private Double getDistinctRowCountInternal(DrillScanRelBase scan, RelMetadataQuery mq, DrillTable table,
    ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
  double selectivity, rowCount;
  /* If predicate is present, determine its selectivity to estimate filtered rows.
   * Thereafter, compute the number of distinct rows.
   */
  selectivity = mq.getSelectivity(scan, predicate);
  rowCount = mq.getRowCount(scan);

  if (groupKey.length() == 0) {
    return selectivity * rowCount;
  }

  /* If predicate is present, determine its selectivity to estimate filtered rows. Thereafter,
   * compute the number of distinct rows
   */
  selectivity = mq.getSelectivity(scan, predicate);
  TableMetadata tableMetadata;
  try {
    tableMetadata = table.getGroupScan().getTableMetadata();
  } catch (IOException e) {
    // Statistics cannot be obtained, use default behaviour
    return scan.estimateRowCount(mq) * 0.1;
  }
  double s = 1.0;

  for (int i = 0; i < groupKey.length(); i++) {
    final String colName = type.getFieldNames().get(i);
    // Skip NDV, if not available
    if (!groupKey.get(i)) {
      continue;
    }
    ColumnStatistics columnStatistics = tableMetadata != null ? tableMetadata.getColumnStatistics(SchemaPath.getSimplePath(colName)) : null;
    Double ndv = columnStatistics != null ? (Double) columnStatistics.getStatistic(ColumnStatisticsKind.NVD) : null;
    if (ndv == null) {
      continue;
    }
    s *= 1 - ndv / rowCount;
  }
  if (s > 0 && s < 1.0) {
    return (1 - s) * selectivity * rowCount;
  } else if (s == 1.0) {
    // Could not get any NDV estimate from stats - probably stats not present for GBY cols. So Guess!
    return scan.estimateRowCount(mq) * 0.1;
  } else {
    /* rowCount maybe less than NDV(different source), sanity check OR NDV not used at all */
    return selectivity * rowCount;
  }
}
 
Example 8
Source File: RelMdSelectivity.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public Double getSelectivity(RelSubset rel, RelMetadataQuery mq,
    RexNode predicate) {
  return mq.getSelectivity(MoreObjects.firstNonNull(rel.getBest(), rel.getOriginal()), predicate);
}
 
Example 9
Source File: RelMdRowCount.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static double estimateRowCount(Join rel, RelMetadataQuery mq) {
  double rightJoinFactor = 1.0;

  RexNode condition = rel.getCondition();
  if (condition.isAlwaysTrue()) {
    // Cartesian join is only supported for NLJ. If join type is right, make it more expensive
    if (rel.getJoinType() == JoinRelType.RIGHT) {
      rightJoinFactor = 2.0;
    }
    return RelMdUtil.getJoinRowCount(mq, rel, condition) * rightJoinFactor;
  }

  final PlannerSettings plannerSettings = PrelUtil.getPlannerSettings(rel.getCluster().getPlanner());
  double filterMinSelectivityEstimateFactor = plannerSettings == null ?
    PlannerSettings.DEFAULT_FILTER_MIN_SELECTIVITY_ESTIMATE_FACTOR :
    plannerSettings.getFilterMinSelectivityEstimateFactor();
  double filterMaxSelectivityEstimateFactor = plannerSettings == null ?
    PlannerSettings.DEFAULT_FILTER_MAX_SELECTIVITY_ESTIMATE_FACTOR :
    plannerSettings.getFilterMaxSelectivityEstimateFactor();

  final RexNode remaining;
  if (rel instanceof JoinRelBase) {
    remaining = ((JoinRelBase) rel).getRemaining();
  } else {
    remaining = RelOptUtil.splitJoinCondition(rel.getLeft(), rel.getRight(), condition, new ArrayList<>(), new ArrayList<>(), new ArrayList<>());
  }

  double selectivity = mq.getSelectivity(rel, remaining);
  if (!remaining.isAlwaysFalse()) {
    // Cap selectivity at filterMinSelectivityEstimateFactor unless it is always FALSE
    if (selectivity < filterMinSelectivityEstimateFactor) {
      selectivity = filterMinSelectivityEstimateFactor;
    }
  }

  if (!remaining.isAlwaysTrue()) {
    // Cap selectivity at filterMaxSelectivityEstimateFactor unless it is always TRUE
    if (selectivity > filterMaxSelectivityEstimateFactor) {
      selectivity = filterMaxSelectivityEstimateFactor;
    }
    // Make right join more expensive for inequality join condition (logical phase)
    if (rel.getJoinType() == JoinRelType.RIGHT) {
      rightJoinFactor = 2.0;
    }
  }

  return selectivity * Math.max(mq.getRowCount(rel.getLeft()), mq.getRowCount(rel.getRight())) * rightJoinFactor;
}