>From Peeyush Gupta <[email protected]>:
Peeyush Gupta has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20279 )
Change subject: [ASTERIXDB-3641] Sampling query taking long time to run
......................................................................
[ASTERIXDB-3641] Sampling query taking long time to run
- user model changes: no
- storage format changes: no
- interface changes: no
Ext-ref: MB-68268
Change-Id: Iad1a6f9339fd844fb43c04b8f56e811e4337fc0e
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
1 file changed, 54 insertions(+), 7 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/79/20279/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 49a0aa8..da488b2 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -186,13 +186,12 @@
if
(!(joinExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ))) {
return 0.5; // this may not be accurate obviously!
} // we can do all relops here and other joins such as interval
joins and spatial joins, the compile time might increase a lot
-
- Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails)
index1.getIndexDetails();
- Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails)
index2.getIndexDetails();
- if ((idxDetails1.getSourceCardinality() <
idxDetails1.getSampleCardinalityTarget())
- || (idxDetails2.getSourceCardinality() <
idxDetails2.getSampleCardinalityTarget())) {
- double sel =
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
- joinEnum.leafInputs.get(idx2 - 1), index1, index2,
joinExpr, jOp);
+ ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1);
+ ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1);
+ LogicalVariable var1 = exprUsedVars.get(0);
+ LogicalVariable var2 = exprUsedVars.get(1);
+ if (isJoinSelFromSamplesApplicable(leafInput1, leafInput2, index1,
index2, var1, var2)) {
+ double sel = findJoinSelFromSamples(leafInput1, leafInput2,
index1, index2, joinExpr, jOp);
if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
return sel;
}
@@ -203,6 +202,40 @@
}
}
+ private boolean isJoinSelFromSamplesApplicable(ILogicalOperator
leafInput1, ILogicalOperator leafInput2,
+ Index index1, Index index2, LogicalVariable var1, LogicalVariable
var2) throws AlgebricksException {
+ Index.SampleIndexDetails details1 = (Index.SampleIndexDetails)
index1.getIndexDetails();
+ Index.SampleIndexDetails details2 = (Index.SampleIndexDetails)
index2.getIndexDetails();
+ if (details1.getSourceCardinality() >=
details1.getSampleCardinalityTarget()
+ && details2.getSourceCardinality() >=
details2.getSampleCardinalityTarget()) {
+ return false;
+ }
+ double avgNumRowsPerValue1 = computeAvgNumRowsPerValue(leafInput1,
var1, index1);
+ if (avgNumRowsPerValue1 < 0) {
+ return false;
+ }
+ double avgNumRowsPerValue2 = computeAvgNumRowsPerValue(leafInput2,
var2, index2);
+ if (avgNumRowsPerValue2 < 0) {
+ return false;
+ }
+ return avgNumRowsPerValue1 * avgNumRowsPerValue2 <
Math.max(details1.getSourceCardinality(),
+ details2.getSourceCardinality());
+ }
+
+ private double computeAvgNumRowsPerValue(ILogicalOperator leafInput,
LogicalVariable var, Index index)
+ throws AlgebricksException {
+ List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx,
leafInput, var, index);
+ if (result == null) {
+ return -1; // Negative value indicates failure
+ }
+ double numDistincts = findPredicateCardinality(result, true);
+ if (numDistincts == 0) {
+ numDistincts = 1; // just in case
+ }
+ Index.SampleIndexDetails details = (Index.SampleIndexDetails)
index.getIndexDetails();
+ return details.getSourceCardinality() / numDistincts;
+ }
+
private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars,
double card1, double card2, int idx1,
int idx2) throws AlgebricksException {
ILogicalOperator leafInput;
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20279
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: ionic
Gerrit-Change-Id: Iad1a6f9339fd844fb43c04b8f56e811e4337fc0e
Gerrit-Change-Number: 20279
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <[email protected]>
Gerrit-MessageType: newchange