>From Janhavi Tripurwar <[email protected]>:
Janhavi Tripurwar has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20441?usp=email )
Change subject: wip sampling piepeline
......................................................................
wip sampling piepeline
Change-Id: I51f99463d24596e74b5d435f718236de9016f2e8
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
2 files changed, 112 insertions(+), 0 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/41/20441/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 16518c2..a4dff25 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -54,6 +54,7 @@
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.common.utils.Quadruple;
import org.apache.hyracks.algebricks.common.utils.Triple;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
@@ -80,10 +81,12 @@
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.LimitOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import
org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
+import org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn;
import
org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
import
org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
import org.apache.hyracks.api.exceptions.Warning;
@@ -1044,9 +1047,31 @@
this.rootOrderByOp.getAnnotations().put(OperatorAnnotations.OP_OUTPUT_CARDINALITY,
grpInputCard);
}
}
+ maybeAttachStaticRangeMap();
return jnNumber;
}
+ private void maybeAttachStaticRangeMap() throws AlgebricksException {
+ if (rootOrderByOp == null) {
+ return;
+ }
+ AbstractLogicalOperator orderAbs = (AbstractLogicalOperator)
rootOrderByOp;
+ if
(orderAbs.getAnnotations().containsKey(OperatorAnnotations.USE_STATIC_RANGE)
+ || optCtx.getPhysicalOptimizationConfig().getSortParallel() ==
false) {
+ return;
+ }
+ OrderOperator ord = (OrderOperator) orderAbs;
+ List<Mutable<ILogicalExpression>> sortExprs = new ArrayList<>();
+ for (Pair<OrderOperator.IOrder, Mutable<ILogicalExpression>> p :
ord.getOrderExpressions()) {
+ sortExprs.add(new
MutableObject<>(p.second.getValue().cloneExpression()));
+ }
+ // Compute the split vector at compile time
+ List<List<IAObject>> rm = stats.splitHelperFunction(optCtx,
ord.getInputs().get(0).getValue(), sortExprs);
+ if (rm != null) {
+
orderAbs.getAnnotations().put(OperatorAnnotations.USE_STATIC_RANGE, rm);
+ }
+ }
+
private int initializeBaseLevelJoinNodes() throws AlgebricksException {
// join nodes have been allocated in the JoinEnum
// add a dummy Plan Node; we do not want planNode at position 0 to be
a valid plan
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 78cf365..4d21644 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -74,6 +74,7 @@
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+import org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn;
import
org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
import org.apache.hyracks.api.exceptions.ErrorCode;
@@ -912,6 +913,92 @@
return helperFunction(newCtx, newAggOp);
}
+// protected List<List<IAObject>> splitHelperFunction(IOptimizationContext
ctx,
+// ILogicalOperator
logOp,
+//
List<Mutable<ILogicalExpression>> sortExprs) throws AlgebricksException {
+//
+// IOptimizationContext newCtx =
ctx.getOptimizationContextFactory().cloneOptimizationContext(ctx);
+// ILogicalOperator newLogOp =
OperatorManipulationUtil.bottomUpCopyOperators(logOp);
+//
+// ILogicalOperator parent =
joinEnum.findDataSourceScanOperatorParent(newLogOp);
+// DataSourceScanOperator scanOp;
+// if (parent instanceof DataSourceScanOperator) {
+// scanOp = (DataSourceScanOperator) parent;
+// } else {
+// scanOp = (DataSourceScanOperator)
parent.getInputs().get(0).getValue();
+// }
+//
+// // replace the dataScanSourceOperator with the sampling source
+// SampleDataSource sampledatasource =
joinEnum.getSampleDataSource(scanOp);
+// DataSourceScanOperator deepCopyofScan =
+// (DataSourceScanOperator)
OperatorManipulationUtil.bottomUpCopyOperators(scanOp);
+//
+// if (!(parent instanceof DataSourceScanOperator)) {
+// deepCopyofScan.setDataSource(sampledatasource);
+// parent.getInputs().get(0).setValue(deepCopyofScan);
+// } else {
+// scanOp.setDataSource(sampledatasource);
+// }
+//// AbstractLogicalExpression inputVarRef = new
VariableReferenceExpression(sortExprs, newLogOp.getSourceLocation());
+////
+// // 2) Assign new variables to reference the sort key variable(s)
+// List<LogicalVariable> outVars = new ArrayList<>(sortExprs.size());
+// List<Mutable<ILogicalExpression>> assignExprs = new
ArrayList<>(sortExprs.size());
+// for (Mutable<ILogicalExpression> e : sortExprs) {
+// LogicalVariable out = newCtx.newVar();
+// outVars.add(out);
+// assignExprs.add(new
MutableObject<>(e.getValue().cloneExpression()));
+// }
+// AssignOperator assign = new AssignOperator(outVars, assignExprs);
+// assign.getInputs().add(new MutableObject<>(newLogOp));
+//
+// // 3) Project only the assigned key var(s)
+// ProjectOperator project = new ProjectOperator(outVars);
+// project.getInputs().add(new MutableObject<>(assign));
+//
+// // 4) Type the pipeline and run it
+// Mutable<ILogicalOperator> topRef = new MutableObject<>(project);
+// OperatorPropertiesUtil.typeOpRec(topRef, newCtx);
+//
+// return AnalysisUtil.runQuery(topRef, outVars, newCtx,
IRuleSetFactory.RuleSetKind.SAMPLING);
+// }
+
+ protected List<List<IAObject>> splitHelperFunction(IOptimizationContext
ctx,
+ ILogicalOperator
orderInput,
+
List<Mutable<ILogicalExpression>> sortExprs) throws AlgebricksException {
+
+ IOptimizationContext newCtx =
ctx.getOptimizationContextFactory().cloneOptimizationContext(ctx);
+
+ // Copy the subtree feeding ORDER
+ ILogicalOperator newRoot =
OperatorManipulationUtil.bottomUpCopyOperators(orderInput);
+
+ // sample datasource
+ ILogicalOperator parent =
joinEnum.findDataSourceScanOperatorParent(newRoot);
+ if (parent != null && !parent.getInputs().isEmpty()
+ && parent.getInputs().get(0).getValue().getOperatorTag() ==
LogicalOperatorTag.DATASOURCESCAN) {
+ DataSourceScanOperator scan = (DataSourceScanOperator)
parent.getInputs().get(0).getValue();
+ scan.setDataSource(joinEnum.getSampleDataSource(scan)); // scan on
sample source
+ }
+
+ OperatorPropertiesUtil.typeOpRec(new MutableObject<>(newRoot), newCtx);
+
+ // Single Assign
+ List<LogicalVariable> outVars = new ArrayList<>(sortExprs.size());
+ List<Mutable<ILogicalExpression>> assignExprs = new
ArrayList<>(sortExprs.size());
+ for (Mutable<ILogicalExpression> e : sortExprs) {
+ LogicalVariable out = newCtx.newVar();
+ outVars.add(out);
+ assignExprs.add(new
MutableObject<>(e.getValue().cloneExpression()));
+ }
+ AssignOperator assign = new AssignOperator(outVars, assignExprs);
+ assign.getInputs().add(new MutableObject<>(newRoot));
+
+ Mutable<ILogicalOperator> topRef = new MutableObject<>(assign);
+ OperatorPropertiesUtil.typeOpRec(topRef, newCtx);
+
+ return AnalysisUtil.runQuery(topRef, outVars, newCtx,
IRuleSetFactory.RuleSetKind.SAMPLING);
+ }
+
// This one gets the cardinality and also projection sizes
protected List<List<IAObject>>
runSamplingQueryProjection(IOptimizationContext ctx, ILogicalOperator logOp,
int dataset, LogicalVariable primaryKey) throws
AlgebricksException {
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20441?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: I51f99463d24596e74b5d435f718236de9016f2e8
Gerrit-Change-Number: 20441
Gerrit-PatchSet: 1
Gerrit-Owner: Janhavi Tripurwar <[email protected]>