Java Code Examples for org.apache.calcite.rel.metadata.RelMetadataQuery#getDistinctRowCount()

The following examples show how to use org.apache.calcite.rel.metadata.RelMetadataQuery#getDistinctRowCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DrillRelMdDistinctRowCount.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public Double getDistinctRowCount(RelNode rel, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) {
  if (rel instanceof DrillScanRelBase) {                  // Applies to both Drill Logical and Physical Rels
    if (!DrillRelOptUtil.guessRows(rel)) {
      DrillTable table = Utilities.getDrillTable(rel.getTable());
      return getDistinctRowCountInternal(((DrillScanRelBase) rel), mq, table, groupKey, rel.getRowType(), predicate);
    } else {
      /* If we are not using statistics OR there is no table or metadata (stats) table associated with scan,
       * estimate the distinct row count. Consistent with the estimation of Aggregate row count in
       * RelMdRowCount: distinctRowCount = rowCount * 10%.
       */
      if (rel instanceof DrillScanRel) {
        // The existing Drill behavior is to only use this estimation for DrillScanRel and not ScanPrel.
        // TODO: We may potentially do it for ScanPrel (outside the scope of statistics)
        return rel.estimateRowCount(mq) * 0.1;
      }
    }
  } else if (rel instanceof SingleRel && !DrillRelOptUtil.guessRows(rel)) {
    if (rel instanceof Window) {
      int childFieldCount = ((Window) rel).getInput().getRowType().getFieldCount();
      // For window aggregates delegate ndv to parent
      for (int bit : groupKey) {
        if (bit >= childFieldCount) {
          return super.getDistinctRowCount(rel, mq, groupKey, predicate);
        }
      }
    }
    return mq.getDistinctRowCount(((SingleRel) rel).getInput(), groupKey, predicate);
  } else if (rel instanceof DrillJoinRelBase && !DrillRelOptUtil.guessRows(rel)) {
    //Assume ndv is unaffected by the join
    return getDistinctRowCountInternal(((DrillJoinRelBase) rel), mq, groupKey, predicate);
  } else if (rel instanceof RelSubset && !DrillRelOptUtil.guessRows(rel)) {
    if (((RelSubset) rel).getBest() != null) {
      return mq.getDistinctRowCount(((RelSubset) rel).getBest(), groupKey, predicate);
    } else if (((RelSubset) rel).getOriginal() != null) {
      return mq.getDistinctRowCount(((RelSubset) rel).getOriginal(), groupKey, predicate);
    }
  }
  return super.getDistinctRowCount(rel, mq, groupKey, predicate);
}
 
Example 2
Source File: DrillJoinRelBase.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public double estimateRowCount(RelMetadataQuery mq) {
  if (this.condition.isAlwaysTrue()) {
    return joinRowFactor * this.getLeft().estimateRowCount(mq) * this.getRight().estimateRowCount(mq);
  }

  int[] joinFields = new int[2];

  LogicalJoin jr = LogicalJoin.create(this.getLeft(), this.getRight(), this.getCondition(),
          this.getVariablesSet(), this.getJoinType());

  if (!DrillRelOptUtil.guessRows(this)         //Statistics present for left and right side of the join
      && jr.getJoinType() == JoinRelType.INNER
      && DrillRelOptUtil.analyzeSimpleEquiJoin((Join)jr, joinFields)) {
    ImmutableBitSet leq = ImmutableBitSet.of(joinFields[0]);
    ImmutableBitSet req = ImmutableBitSet.of(joinFields[1]);

    Double ldrc = mq.getDistinctRowCount(this.getLeft(), leq, null);
    Double rdrc = mq.getDistinctRowCount(this.getRight(), req, null);

    Double lrc = mq.getRowCount(this.getLeft());
    Double rrc = mq.getRowCount(this.getRight());

    if (ldrc != null && rdrc != null && lrc != null && rrc != null) {
      // Join cardinality = (lrc * rrc) / Math.max(ldrc, rdrc). Avoid overflow by dividing earlier
      return (lrc / Math.max(ldrc, rdrc)) * rrc;
    }
  }

  return joinRowFactor * Math.max(
      mq.getRowCount(this.getLeft()),
      mq.getRowCount(this.getRight()));
}
 
Example 3
Source File: RelMetadataTest.java    From calcite with Apache License 2.0 5 votes vote down vote up
@Test void testDistinctRowCountTable() {
  // no unique key information is available so return null
  RelNode rel = convertSql("select * from emp where deptno = 10");
  final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
  ImmutableBitSet groupKey =
      ImmutableBitSet.of(rel.getRowType().getFieldNames().indexOf("DEPTNO"));
  Double result = mq.getDistinctRowCount(rel, groupKey, null);
  assertThat(result, nullValue());
}
 
Example 4
Source File: RelMetadataTest.java    From calcite with Apache License 2.0 5 votes vote down vote up
@Test void testDistinctRowCountTableEmptyKey() {
  RelNode rel = convertSql("select * from emp where deptno = 10");
  ImmutableBitSet groupKey = ImmutableBitSet.of(); // empty key
  final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
  Double result = mq.getDistinctRowCount(rel, groupKey, null);
  assertThat(result, is(1D));
}
 
Example 5
Source File: LoptOptimizeJoinRule.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Computes the cardinality of the join columns from a particular factor,
 * when that factor is joined with another join tree.
 *
 * @param multiJoin join factors being optimized
 * @param semiJoinOpt optimal semijoins chosen for each factor
 * @param joinTree the join tree that the factor is being joined with
 * @param filters possible join filters to select from
 * @param factor the factor being added
 *
 * @return computed cardinality
 */
private Double computeJoinCardinality(
    RelMetadataQuery mq,
    LoptMultiJoin multiJoin,
    LoptSemiJoinOptimizer semiJoinOpt,
    LoptJoinTree joinTree,
    List<RexNode> filters,
    int factor) {
  final ImmutableBitSet childFactors =
      ImmutableBitSet.builder()
          .addAll(joinTree.getTreeOrder())
          .set(factor)
          .build();

  int factorStart = multiJoin.getJoinStart(factor);
  int nFields = multiJoin.getNumFieldsInJoinFactor(factor);
  final ImmutableBitSet.Builder joinKeys = ImmutableBitSet.builder();

  // first loop through the inner join filters, picking out the ones
  // that reference only the factors in either the join tree or the
  // factor that will be added
  setFactorJoinKeys(
      multiJoin,
      filters,
      childFactors,
      factorStart,
      nFields,
      joinKeys);

  // then loop through the outer join filters where the factor being
  // added is the null generating factor in the outer join
  setFactorJoinKeys(
      multiJoin,
      RelOptUtil.conjunctions(multiJoin.getOuterJoinCond(factor)),
      childFactors,
      factorStart,
      nFields,
      joinKeys);

  // if the join tree doesn't contain all the necessary factors in
  // any of the join filters, then joinKeys will be empty, so return
  // null in that case
  if (joinKeys.isEmpty()) {
    return null;
  } else {
    return mq.getDistinctRowCount(semiJoinOpt.getChosenSemiJoin(factor),
        joinKeys.build(), null);
  }
}
 
Example 6
Source File: DrillRelMdDistinctRowCount.java    From Bats with Apache License 2.0 4 votes vote down vote up
private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadataQuery mq, ImmutableBitSet groupKey,
     RexNode predicate) {
  if (DrillRelOptUtil.guessRows(joinRel)) {
    return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
  }
  // Assume NDV is unaffected by the join when groupKey comes from one side of the join
  // Alleviates NDV over-estimates
  ImmutableBitSet.Builder leftMask = ImmutableBitSet.builder();
  ImmutableBitSet.Builder rightMask = ImmutableBitSet.builder();
  JoinRelType joinType = joinRel.getJoinType();
  RelNode left = joinRel.getInputs().get(0);
  RelNode right = joinRel.getInputs().get(1);
  RelMdUtil.setLeftRightBitmaps(groupKey, leftMask, rightMask,
      left.getRowType().getFieldCount());
  RexNode leftPred = null;
  RexNode rightPred = null;

  // Identify predicates which can be pushed onto the left and right sides of the join
  if (predicate != null) {
    List<RexNode> leftFilters = new ArrayList<>();
    List<RexNode> rightFilters = new ArrayList<>();
    List<RexNode> joinFilters = new ArrayList<>();
    List<RexNode> predList = RelOptUtil.conjunctions(predicate);
    RelOptUtil.classifyFilters(joinRel, predList, joinType, joinType == JoinRelType.INNER,
        !joinType.generatesNullsOnLeft(), !joinType.generatesNullsOnRight(), joinFilters,
            leftFilters, rightFilters);
    RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
    leftPred = RexUtil.composeConjunction(rexBuilder, leftFilters, true);
    rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, true);
  }

  Double leftDistRowCount = null;
  Double rightDistRowCount = null;
  double distRowCount = 1;
  ImmutableBitSet lmb = leftMask.build();
  ImmutableBitSet rmb = rightMask.build();
  // Get NDV estimates for the left and right side predicates, if applicable
  if (lmb.length() > 0) {
    leftDistRowCount = mq.getDistinctRowCount(left, lmb, leftPred);
    if (leftDistRowCount != null) {
      distRowCount = leftDistRowCount;
    }
  }
  if (rmb.length() > 0) {
    rightDistRowCount = mq.getDistinctRowCount(right, rmb, rightPred);
    if (rightDistRowCount != null) {
      distRowCount = rightDistRowCount;
    }
  }
  // Use max of NDVs from both sides of the join, if applicable
  if (leftDistRowCount != null && rightDistRowCount != null) {
    distRowCount = Math.max(leftDistRowCount, rightDistRowCount);
  }
  return RelMdUtil.numDistinctVals(distRowCount, mq.getRowCount(joinRel));
}
 
Example 7
Source File: RuntimeFilterVisitor.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Generate a possible RuntimeFilter of a HashJoinPrel, left some BF parameters of the generated RuntimeFilter
 * to be set later.
 *
 * @param hashJoinPrel
 * @return null or a partial information RuntimeFilterDef
 */
private RuntimeFilterDef generateRuntimeFilter(HashJoinPrel hashJoinPrel) {
  JoinRelType joinRelType = hashJoinPrel.getJoinType();
  JoinInfo joinInfo = hashJoinPrel.analyzeCondition();
  boolean allowJoin = (joinInfo.isEqui()) && (joinRelType == JoinRelType.INNER || joinRelType == JoinRelType.RIGHT);
  if (!allowJoin) {
    return null;
  }
  //TODO check whether to enable RuntimeFilter according to the NDV percent
  /**
   double threshold = 0.5;
   double percent = leftNDV / rightDNV;
   if (percent > threshold ) {
   return null;
   }
   */

  List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
  //find the possible left scan node of the left join key
  ScanPrel probeSideScanPrel = null;
  RelNode left = hashJoinPrel.getLeft();
  RelNode right = hashJoinPrel.getRight();
  ExchangePrel exchangePrel = findRightExchangePrel(right);
  if (exchangePrel == null) {
    //Does not support the single fragment mode ,that is the right build side
    //can only be BroadcastExchangePrel or HashToRandomExchangePrel
    return null;
  }
  List<String> leftFields = left.getRowType().getFieldNames();
  List<String> rightFields = right.getRowType().getFieldNames();
  List<Integer> leftKeys = hashJoinPrel.getLeftKeys();
  List<Integer> rightKeys = hashJoinPrel.getRightKeys();
  RelMetadataQuery metadataQuery = left.getCluster().getMetadataQuery();
  int i = 0;
  for (Integer leftKey : leftKeys) {
    String leftFieldName = leftFields.get(leftKey);
    Integer rightKey = rightKeys.get(i++);
    String rightFieldName = rightFields.get(rightKey);

    //This also avoids the left field of the join condition with a function call.
    ScanPrel scanPrel = findLeftScanPrel(leftFieldName, left);
    if (scanPrel != null) {
      boolean encounteredBlockNode = containBlockNode((Prel) left, scanPrel);
      if (encounteredBlockNode) {
        continue;
      }
      //Collect NDV from the Metadata
      RelDataType scanRowType = scanPrel.getRowType();
      RelDataTypeField field = scanRowType.getField(leftFieldName, true, true);
      int index = field.getIndex();
      Double ndv = metadataQuery.getDistinctRowCount(scanPrel, ImmutableBitSet.of(index), null);
      if (ndv == null) {
        //If NDV is not supplied, we use the row count to estimate the ndv.
        ndv = left.estimateRowCount(metadataQuery) * 0.1;
      }
      int bloomFilterSizeInBytes = BloomFilter.optimalNumOfBytes(ndv.longValue(), fpp);
      bloomFilterSizeInBytes = bloomFilterSizeInBytes > bloomFilterMaxSizeInBytesDef ? bloomFilterMaxSizeInBytesDef : bloomFilterSizeInBytes;
      //left the local parameter to be set later.
      BloomFilterDef bloomFilterDef = new BloomFilterDef(bloomFilterSizeInBytes, false, leftFieldName, rightFieldName);
      bloomFilterDef.setLeftNDV(ndv);
      bloomFilterDefs.add(bloomFilterDef);
      toAddRuntimeFilter.add(scanPrel);
      probeSideScanPrel = scanPrel;
    }
  }
  if (bloomFilterDefs.size() > 0) {
    //left sendToForeman parameter to be set later.
    RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
    probeSideScan2hj.put(probeSideScanPrel, hashJoinPrel);
    return runtimeFilterDef;
  }
  return null;
}
 
Example 8
Source File: RelMdDistinctRowCount.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public Double getDistinctRowCount(HepRelVertex vertex, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) {
  return mq.getDistinctRowCount(vertex.getCurrentRel(), groupKey, predicate);
}
 
Example 9
Source File: RelMdDistinctRowCount.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public Double getDistinctRowCount(ReplacementPointer pointer, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) {
  return mq.getDistinctRowCount(pointer.getSubTree(), groupKey, predicate);
}
 
Example 10
Source File: LoptOptimizeJoinRule.java    From calcite with Apache License 2.0 4 votes vote down vote up
/**
 * Computes the cardinality of the join columns from a particular factor,
 * when that factor is joined with another join tree.
 *
 * @param multiJoin join factors being optimized
 * @param semiJoinOpt optimal semijoins chosen for each factor
 * @param joinTree the join tree that the factor is being joined with
 * @param filters possible join filters to select from
 * @param factor the factor being added
 *
 * @return computed cardinality
 */
private Double computeJoinCardinality(
    RelMetadataQuery mq,
    LoptMultiJoin multiJoin,
    LoptSemiJoinOptimizer semiJoinOpt,
    LoptJoinTree joinTree,
    List<RexNode> filters,
    int factor) {
  final ImmutableBitSet childFactors =
      ImmutableBitSet.builder()
          .addAll(joinTree.getTreeOrder())
          .set(factor)
          .build();

  int factorStart = multiJoin.getJoinStart(factor);
  int nFields = multiJoin.getNumFieldsInJoinFactor(factor);
  final ImmutableBitSet.Builder joinKeys = ImmutableBitSet.builder();

  // first loop through the inner join filters, picking out the ones
  // that reference only the factors in either the join tree or the
  // factor that will be added
  setFactorJoinKeys(
      multiJoin,
      filters,
      childFactors,
      factorStart,
      nFields,
      joinKeys);

  // then loop through the outer join filters where the factor being
  // added is the null generating factor in the outer join
  setFactorJoinKeys(
      multiJoin,
      RelOptUtil.conjunctions(multiJoin.getOuterJoinCond(factor)),
      childFactors,
      factorStart,
      nFields,
      joinKeys);

  // if the join tree doesn't contain all the necessary factors in
  // any of the join filters, then joinKeys will be empty, so return
  // null in that case
  if (joinKeys.isEmpty()) {
    return null;
  } else {
    return mq.getDistinctRowCount(semiJoinOpt.getChosenSemiJoin(factor),
        joinKeys.build(), null);
  }
}