Java Code Examples for org.apache.calcite.rel.metadata.RelMetadataQuery#getRowCount()

The following examples show how to use org.apache.calcite.rel.metadata.RelMetadataQuery#getRowCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NestedLoopJoinPrel.java    From Bats with Apache License 2.0 6 votes vote down vote up
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
  if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
    return super.computeSelfCost(planner, mq).multiplyBy(.1);
  }
  double leftRowCount = mq.getRowCount(this.getLeft());
  double rightRowCount = mq.getRowCount(this.getRight());
  double nljFactor = PrelUtil.getSettings(getCluster()).getNestedLoopJoinFactor();

  // cpu cost of evaluating each expression in join condition
  int exprNum = RelOptUtil.conjunctions(getCondition()).size() + RelOptUtil.disjunctions(getCondition()).size();
  double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * exprNum;

  double cpuCost = joinConditionCost * (leftRowCount * rightRowCount) * nljFactor;

  DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
  return costFactory.makeCost(leftRowCount * rightRowCount, cpuCost, 0, 0, 0);
}
 
Example 2
Source File: EnumerableBatchNestedLoopJoin.java    From calcite with Apache License 2.0 6 votes vote down vote up
@Override public RelOptCost computeSelfCost(
    final RelOptPlanner planner,
    final RelMetadataQuery mq) {
  double rowCount = mq.getRowCount(this);

  final double rightRowCount = right.estimateRowCount(mq);
  final double leftRowCount = left.estimateRowCount(mq);
  if (Double.isInfinite(leftRowCount) || Double.isInfinite(rightRowCount)) {
    return planner.getCostFactory().makeInfiniteCost();
  }

  Double restartCount = mq.getRowCount(getLeft()) / variablesSet.size();

  RelOptCost rightCost = planner.getCost(getRight(), mq);
  RelOptCost rescanCost =
      rightCost.multiplyBy(Math.max(1.0, restartCount - 1));

  // TODO Add cost of last loop (the one that looks for the match)
  return planner.getCostFactory().makeCost(
      rowCount + leftRowCount, 0, 0).plus(rescanCost);
}
 
Example 3
Source File: Correlate.java    From Bats with Apache License 2.0 6 votes vote down vote up
@Override public RelOptCost computeSelfCost(RelOptPlanner planner,
    RelMetadataQuery mq) {
  double rowCount = mq.getRowCount(this);

  final double rightRowCount = right.estimateRowCount(mq);
  final double leftRowCount = left.estimateRowCount(mq);
  if (Double.isInfinite(leftRowCount) || Double.isInfinite(rightRowCount)) {
    return planner.getCostFactory().makeInfiniteCost();
  }

  Double restartCount = mq.getRowCount(getLeft());
  // RelMetadataQuery.getCumulativeCost(getRight()); does not work for
  // RelSubset, so we ask planner to cost-estimate right relation
  RelOptCost rightCost = planner.getCost(getRight(), mq);
  RelOptCost rescanCost =
      rightCost.multiplyBy(Math.max(1.0, restartCount - 1));

  return planner.getCostFactory().makeCost(
      rowCount /* generate results */ + leftRowCount /* scan left results */,
      0, 0).plus(rescanCost);
}
 
Example 4
Source File: RelMetadataTest.java    From calcite with Apache License 2.0 5 votes vote down vote up
private void checkExchangeRowCount(RelNode rel, double expected, double expectedMin,
    double expectedMax) {
  final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
  final Double result = mq.getRowCount(rel);
  assertThat(result, notNullValue());
  assertThat(result, is(expected));
  final Double max = mq.getMaxRowCount(rel);
  assertThat(max, notNullValue());
  assertThat(max, is(expectedMax));
  final Double min = mq.getMinRowCount(rel);
  assertThat(min, notNullValue());
  assertThat(min, is(expectedMin));
}
 
Example 5
Source File: FilterRelBase.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private double estimateCpuCost(RelMetadataQuery relMetadataQuery) {
  RelNode child = this.getInput();
  final double rows = relMetadataQuery.getRowCount(child);
  double compNum = rows;
  double rowCompNum = child.getRowType().getFieldCount() * rows ;


  for (int i = 0; i< numConjuncts; i++) {
    RexNode conjFilter = RexUtil.composeConjunction(this.getCluster().getRexBuilder(), conjunctions.subList(0, i + 1), false);
    compNum += RelMdUtil.estimateFilteredRows(child, conjFilter, relMetadataQuery);
  }

  return compNum * DremioCost.COMPARE_CPU_COST + rowCompNum * DremioCost.COPY_COST;
}
 
Example 6
Source File: DrillScreenRelBase.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
  if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
    return super.computeSelfCost(planner, mq).multiplyBy(.1);
  }
  // by default, assume cost is proportional to number of rows
  double rowCount = mq.getRowCount(this);
  DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory();
  return costFactory.makeCost(rowCount, rowCount, 0, 0).multiplyBy(0.1);
}
 
Example 7
Source File: JdbcRules.java    From calcite with Apache License 2.0 5 votes vote down vote up
public RelOptCost computeSelfCost(RelOptPlanner planner,
    RelMetadataQuery mq) {
  double dRows = mq.getRowCount(this);
  double dCpu = mq.getRowCount(getInput())
      * program.getExprCount();
  double dIo = 0;
  return planner.getCostFactory().makeCost(dRows, dCpu, dIo);
}
 
Example 8
Source File: UnionAllPrel.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
  if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
    return super.computeSelfCost(planner).multiplyBy(.1);
  }
  double totalInputRowCount = 0;
  for (int i = 0; i < this.getInputs().size(); i++) {
    totalInputRowCount += mq.getRowCount(this.getInputs().get(i));
  }

  double cpuCost = totalInputRowCount * DremioCost.BASE_CPU_COST;
  Factory costFactory = (Factory)planner.getCostFactory();
  return costFactory.makeCost(totalInputRowCount, cpuCost, 0, 0);
}
 
Example 9
Source File: ConverterImpl.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override public RelOptCost computeSelfCost(RelOptPlanner planner,
    RelMetadataQuery mq) {
  double dRows = mq.getRowCount(getInput());
  double dCpu = dRows;
  double dIo = 0;
  return planner.getCostFactory().makeCost(dRows, dCpu, dIo);
}
 
Example 10
Source File: RowKeyJoinPrel.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
  if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
    return super.computeSelfCost(planner).multiplyBy(.1);
  }
  double rowCount = mq.getRowCount(this.getRight());
  DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();

  // RowKeyJoin operator by itself incurs negligible CPU and I/O cost since it is not doing a real join.
  // The actual cost is attributed to the skip-scan (random I/O). The RK join will hold 1 batch in memory but
  // it is not making any extra copy of either the left or right batches, so the memory cost is 0
  return costFactory.makeCost(rowCount, 0, 0, 0, 0);
}
 
Example 11
Source File: EnumerableNestedLoopJoin.java    From calcite with Apache License 2.0 5 votes vote down vote up
@Override public RelOptCost computeSelfCost(RelOptPlanner planner,
    RelMetadataQuery mq) {
  double rowCount = mq.getRowCount(this);

  // Joins can be flipped, and for many algorithms, both versions are viable
  // and have the same cost. To make the results stable between versions of
  // the planner, make one of the versions slightly more expensive.
  switch (joinType) {
  case SEMI:
  case ANTI:
    // SEMI and ANTI join cannot be flipped
    break;
  case RIGHT:
    rowCount = RelMdUtil.addEpsilon(rowCount);
    break;
  default:
    if (RelNodes.COMPARATOR.compare(left, right) > 0) {
      rowCount = RelMdUtil.addEpsilon(rowCount);
    }
  }

  final double rightRowCount = right.estimateRowCount(mq);
  final double leftRowCount = left.estimateRowCount(mq);
  if (Double.isInfinite(leftRowCount)) {
    rowCount = leftRowCount;
  }
  if (Double.isInfinite(rightRowCount)) {
    rowCount = rightRowCount;
  }

  RelOptCost cost = planner.getCostFactory().makeCost(rowCount, 0, 0);
  // Give it some penalty
  cost = cost.multiplyBy(10);
  return cost;
}
 
Example 12
Source File: Filter.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override public RelOptCost computeSelfCost(RelOptPlanner planner,
    RelMetadataQuery mq) {
  double dRows = mq.getRowCount(this);
  double dCpu = mq.getRowCount(getInput());
  double dIo = 0;
  return planner.getCostFactory().makeCost(dRows, dCpu, dIo);
}
 
Example 13
Source File: Calc.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override public RelOptCost computeSelfCost(RelOptPlanner planner,
    RelMetadataQuery mq) {
  double dRows = mq.getRowCount(this);
  double dCpu = mq.getRowCount(getInput())
      * program.getExprCount();
  double dIo = 0;
  return planner.getCostFactory().makeCost(dRows, dCpu, dIo);
}
 
Example 14
Source File: UnionAllPrel.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
  if(PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
    return super.computeSelfCost(planner, mq).multiplyBy(.1);
  }
  double totalInputRowCount = 0;
  for (int i = 0; i < this.getInputs().size(); i++) {
    totalInputRowCount += mq.getRowCount(this.getInputs().get(i));
  }

  double cpuCost = totalInputRowCount * DrillCostBase.BASE_CPU_COST;
  DrillCostFactory costFactory = (DrillCostFactory)planner.getCostFactory();
  return costFactory.makeCost(totalInputRowCount, cpuCost, 0, 0);
}
 
Example 15
Source File: LimitRelBase.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public double estimateRowCount(RelMetadataQuery mq) {
  int off = offset != null ? RexLiteral.intValue(offset) : 0 ;

  if (fetch == null) {
    return mq.getRowCount(getInput()) - off;
  } else {
    int f = RexLiteral.intValue(fetch);
    return off + f;
  }
}
 
Example 16
Source File: ElasticScanPrel.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
public double estimateRowCount(RelMetadataQuery mq) {
  return mq.getRowCount(input);
}
 
Example 17
Source File: JoinPruleBase.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
protected boolean checkBroadcastConditions(RelOptPlanner planner, JoinRel join, RelNode left, RelNode right) {
  final RelMetadataQuery mq = join.getCluster().getMetadataQuery();
  // Right node is the one that is being considered to be broadcasted..
  double targetRowCount = mq.getRowCount(right);
  int targetColumnCount = right.getRowType().getFieldCount();
  double targetCellCount = targetRowCount * targetColumnCount;
  double otherRowCount = mq.getRowCount(left);

  if (targetRowCount < PrelUtil.getSettings(join.getCluster()).getBroadcastThreshold()
      && ! left.getTraitSet().getTrait(DistributionTraitDef.INSTANCE).equals(DistributionTrait.SINGLETON)
      && (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT)) {
    // DX-3862:  For broadcast joins, the cost should not just consider the traits and join type.  If the broadcast table is small enough,
    // we shouldn't need to worry too much and allow broadcast join and see what the planner picks.
    final PlannerSettings plannerSettings = PrelUtil.getSettings(join.getCluster());
    double cellCountThreshold = plannerSettings.getOptions().getOption(PlannerSettings.BROADCAST_CELL_COUNT_THRESHOLD);
    if (targetCellCount > cellCountThreshold) {
      // DX-17913 : For cases when the table is too big due to large number of columns, we should not do the broadcast join.
      logger.debug("Won't do broadcast join if the size of the table is too big based of total number of cells (rows x columns)");
      return false;
    }
    if (targetRowCount <= plannerSettings.getOptions().getOption(PlannerSettings.BROADCAST_MIN_THRESHOLD)) {
      logger.debug("Enable broadcast plan? true (rightRowCount {} smaller than minimum broadcast threshold)", targetRowCount);
      return true;
    }

    final long maxWidthPerNode = plannerSettings.getMaxWidthPerNode();

    if (maxWidthPerNode <= 0) {
      logger.debug("No executors are available. Won't do broadcast join");
      return false;
    }

    // In this case, the broadcast table is big-ish.  So, we should check to see if it is reasonable to do broadcast.
    // The broadcasted table will be sent at most (numEndPoints * maxWidthPerNode) times, (rightRowCount) rows.  We add a
    // penalty to broadcast (broadcastFactor).
    final double broadcastFactor = plannerSettings.getBroadcastFactor();

    final int numEndPoints = plannerSettings.numEndPoints();
    final long maxWidthPerQuery = plannerSettings.getOptions().getOption(ExecConstants.MAX_WIDTH_GLOBAL);
    final long sliceTarget = plannerSettings.getSliceTarget();
    final double minFactor = Doubles.min(otherRowCount * 1.0 / sliceTarget, numEndPoints * maxWidthPerNode, maxWidthPerQuery);
    final boolean enableBroadCast = (minFactor * broadcastFactor < otherRowCount);
    logger.debug("Enable broadcast plan? {} minFactor {} (numEndPoints {}, maxWidthPerNode {}, rightRowCount {}, broadcastFactor {}, leftRowCount {}, sliceTarget {}, maxWidthPerQuery {})",
        enableBroadCast, minFactor, numEndPoints, maxWidthPerNode, targetRowCount, broadcastFactor, otherRowCount, sliceTarget, maxWidthPerQuery);
    return enableBroadCast;
  }

  return false;
}
 
Example 18
Source File: HepRelVertex.java    From calcite with Apache License 2.0 4 votes vote down vote up
@Override public double estimateRowCount(RelMetadataQuery mq) {
  return mq.getRowCount(currentRel);
}
 
Example 19
Source File: ReplacementPointer.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
public double estimateRowCount(RelMetadataQuery mq) {
  return mq.getRowCount(equivalent);
}
 
Example 20
Source File: DrillRelMdDistinctRowCount.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Estimates the number of rows which would be produced by a GROUP BY on the
 * set of columns indicated by groupKey.
 * column").
 */
private Double getDistinctRowCountInternal(DrillScanRelBase scan, RelMetadataQuery mq, DrillTable table,
    ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
  double selectivity, rowCount;
  /* If predicate is present, determine its selectivity to estimate filtered rows.
   * Thereafter, compute the number of distinct rows.
   */
  selectivity = mq.getSelectivity(scan, predicate);
  rowCount = mq.getRowCount(scan);

  if (groupKey.length() == 0) {
    return selectivity * rowCount;
  }

  /* If predicate is present, determine its selectivity to estimate filtered rows. Thereafter,
   * compute the number of distinct rows
   */
  selectivity = mq.getSelectivity(scan, predicate);
  TableMetadata tableMetadata;
  try {
    tableMetadata = table.getGroupScan().getTableMetadata();
  } catch (IOException e) {
    // Statistics cannot be obtained, use default behaviour
    return scan.estimateRowCount(mq) * 0.1;
  }
  double s = 1.0;

  for (int i = 0; i < groupKey.length(); i++) {
    final String colName = type.getFieldNames().get(i);
    // Skip NDV, if not available
    if (!groupKey.get(i)) {
      continue;
    }
    ColumnStatistics columnStatistics = tableMetadata != null ? tableMetadata.getColumnStatistics(SchemaPath.getSimplePath(colName)) : null;
    Double ndv = columnStatistics != null ? (Double) columnStatistics.getStatistic(ColumnStatisticsKind.NVD) : null;
    if (ndv == null) {
      continue;
    }
    s *= 1 - ndv / rowCount;
  }
  if (s > 0 && s < 1.0) {
    return (1 - s) * selectivity * rowCount;
  } else if (s == 1.0) {
    // Could not get any NDV estimate from stats - probably stats not present for GBY cols. So Guess!
    return scan.estimateRowCount(mq) * 0.1;
  } else {
    /* rowCount maybe less than NDV(different source), sanity check OR NDV not used at all */
    return selectivity * rowCount;
  }
}