>From Peeyush Gupta <[email protected]>:
Peeyush Gupta has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20366?usp=email )
Change subject: [ASTERIXDB-3641] Sampling query taking long time to run
......................................................................
[ASTERIXDB-3641] Sampling query taking long time to run
- user model changes: no
- storage format changes: no
- interface changes: no
Ext-ref: MB-68268
Change-Id: I8ad01e32fafdb68aab738cf49070646a4c1ddfe9
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
1 file changed, 46 insertions(+), 5 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/66/20366/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 0d0558e..78cf365 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -177,11 +177,13 @@
boolean unnestOp1 =
joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx1 - 1));
boolean unnestOp2 =
joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx2 - 1));
boolean unnestOp = unnestOp1 || unnestOp2;
- Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails)
index1.getIndexDetails();
- Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails)
index2.getIndexDetails();
- if (((idxDetails1.getSourceCardinality() <
idxDetails1.getSampleCardinalityTarget())
- || (idxDetails2.getSourceCardinality() <
idxDetails2.getSampleCardinalityTarget())
- || exprUsedVars.size() > 2) && !unnestOp) { //* if there
are more than 2 variables, it is not a simple join like r.a op s.a
+ ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1);
+ ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1);
+ LogicalVariable var1 = exprUsedVars.get(0);
+ LogicalVariable var2 = exprUsedVars.get(1);
+ // If there are more than 2 variables, it is not a simple join
like r.a op s.a
+ if (!unnestOp && (exprUsedVars.size() > 2
+ || isJoinSelFromSamplesApplicable(leafInput1, leafInput2,
index1, index2, var1, var2))) {
double sels =
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
joinEnum.leafInputs.get(idx2 - 1), index1, index2,
joinExpr, jOp);
if (sels == 0.0) {
@@ -195,6 +197,45 @@
}
}
+ private boolean isJoinSelFromSamplesApplicable(ILogicalOperator
leafInput1, ILogicalOperator leafInput2,
+ Index index1, Index index2, LogicalVariable var1, LogicalVariable
var2) throws AlgebricksException {
+ Index.SampleIndexDetails details1 = (Index.SampleIndexDetails)
index1.getIndexDetails();
+ Index.SampleIndexDetails details2 = (Index.SampleIndexDetails)
index2.getIndexDetails();
+ if (details1.getSourceCardinality() >=
details1.getSampleCardinalityTarget()
+ && details2.getSourceCardinality() >=
details2.getSampleCardinalityTarget()) {
+ return false;
+ }
+ double numDistinct1 = computeNumDistinct(leafInput1, var1, index1);
+ if (numDistinct1 < 0) {
+ return false;
+ }
+ double avgNumRowsPerValue1 = details1.getSourceCardinality() /
numDistinct1;
+ double numDistinct2 = computeNumDistinct(leafInput2, var2, index2);
+ if (numDistinct2 < 0) {
+ return false;
+ }
+ double avgNumRowsPerValue2 = details2.getSourceCardinality() /
numDistinct2;
+ return avgNumRowsPerValue1 * avgNumRowsPerValue2 *
Math.min(numDistinct1, numDistinct2) <= Math
+ .max(Math.max(details1.getSourceCardinality(),
details2.getSourceCardinality()), 750000);
+ }
+
+ private double computeNumDistinct(ILogicalOperator leafInput,
LogicalVariable var, Index index)
+ throws AlgebricksException {
+ List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx,
leafInput, var, index);
+ if (result == null) {
+ return -1; // Negative value indicates failure
+ }
+ double numDistincts = findPredicateCardinality(result, true);
+ Index.SampleIndexDetails details = (Index.SampleIndexDetails)
index.getIndexDetails();
+ if (numDistincts == 0) {
+ numDistincts = details.getSourceCardinality(); // All values are
nulls
+ }
+ if (numDistincts == 0) {
+ numDistincts = 1; // Sample is empty
+ }
+ return numDistincts;
+ }
+
private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars,
double card1, double card2, int idx1,
int idx2, boolean unnestOp1, boolean unnestOp2) throws
AlgebricksException {
ILogicalOperator leafInput;
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20366?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: I8ad01e32fafdb68aab738cf49070646a4c1ddfe9
Gerrit-Change-Number: 20366
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <[email protected]>