>From Peeyush Gupta <[email protected]>: Peeyush Gupta has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20279 )
Change subject: [ASTERIXDB-3641] Sampling query taking long time to run ...................................................................... [ASTERIXDB-3641] Sampling query taking long time to run - user model changes: no - storage format changes: no - interface changes: no Ext-ref: MB-68268 Change-Id: Iad1a6f9339fd844fb43c04b8f56e811e4337fc0e Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20279 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Preetham Poluparthi <[email protected]> Tested-by: Peeyush Gupta <[email protected]> --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java 1 file changed, 63 insertions(+), 7 deletions(-) Approvals: Preetham Poluparthi: Looks good to me, approved Peeyush Gupta: Verified Jenkins: Verified diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java index 49a0aa8..d29f907 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java @@ -186,13 +186,12 @@ if (!(joinExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ))) { return 0.5; // this may not be accurate obviously! } // we can do all relops here and other joins such as interval joins and spatial joins, the compile time might increase a lot - - Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails(); - Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails(); - if ((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget()) - || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget())) { - double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1), - joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp); + ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1); + ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1); + LogicalVariable var1 = exprUsedVars.get(0); + LogicalVariable var2 = exprUsedVars.get(1); + if (isJoinSelFromSamplesApplicable(leafInput1, leafInput2, index1, index2, var1, var2)) { + double sel = findJoinSelFromSamples(leafInput1, leafInput2, index1, index2, joinExpr, jOp); if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity return sel; } @@ -203,6 +202,45 @@ } } + private boolean isJoinSelFromSamplesApplicable(ILogicalOperator leafInput1, ILogicalOperator leafInput2, + Index index1, Index index2, LogicalVariable var1, LogicalVariable var2) throws AlgebricksException { + Index.SampleIndexDetails details1 = (Index.SampleIndexDetails) index1.getIndexDetails(); + Index.SampleIndexDetails details2 = (Index.SampleIndexDetails) index2.getIndexDetails(); + if (details1.getSourceCardinality() >= details1.getSampleCardinalityTarget() + && details2.getSourceCardinality() >= details2.getSampleCardinalityTarget()) { + return false; + } + double numDistinct1 = computeNumDistinct(leafInput1, var1, index1); + if (numDistinct1 < 0) { + return false; + } + double avgNumRowsPerValue1 = details1.getSourceCardinality() / numDistinct1; + double numDistinct2 = computeNumDistinct(leafInput2, var2, index2); + if (numDistinct2 < 0) { + return false; + } + double avgNumRowsPerValue2 = details2.getSourceCardinality() / numDistinct2; + return avgNumRowsPerValue1 * avgNumRowsPerValue2 * Math.min(numDistinct1, numDistinct2) <= Math + .max(Math.max(details1.getSourceCardinality(), details2.getSourceCardinality()), 750000); + } + + private double computeNumDistinct(ILogicalOperator leafInput, LogicalVariable var, Index index) + throws AlgebricksException { + List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx, leafInput, var, index); + if (result == null) { + return -1; // Negative value indicates failure + } + double numDistincts = findPredicateCardinality(result, true); + Index.SampleIndexDetails details = (Index.SampleIndexDetails) index.getIndexDetails(); + if (numDistincts == 0) { + numDistincts = details.getSourceCardinality(); // All values are nulls + } + if (numDistincts == 0) { + numDistincts = 1; // Sample is empty + } + return numDistincts; + } + private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars, double card1, double card2, int idx1, int idx2) throws AlgebricksException { ILogicalOperator leafInput; -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20279 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: ionic Gerrit-Change-Id: Iad1a6f9339fd844fb43c04b8f56e811e4337fc0e Gerrit-Change-Number: 20279 Gerrit-PatchSet: 5 Gerrit-Owner: Peeyush Gupta <[email protected]> Gerrit-Reviewer: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Ian Maxon <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Peeyush Gupta <[email protected]> Gerrit-Reviewer: Preetham Poluparthi <[email protected]> Gerrit-Reviewer: [email protected] Gerrit-MessageType: merged
