morningman commented on a change in pull request #3150: Support non-correlated subquery in having clause URL: https://github.com/apache/incubator-doris/pull/3150#discussion_r396200869
########## File path: fe/src/main/java/org/apache/doris/analysis/StmtRewriter.java ########## @@ -97,8 +104,142 @@ private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer) } rewriteWhereClauseSubqueries(stmt, analyzer); } + // Rewrite all subquery in the having clause + if (stmt.getHavingPred() != null && stmt.getHavingPred().getSubquery() != null) { + stmt = rewriteHavingClauseSubqueries(stmt, analyzer); + } stmt.sqlString_ = null; if (LOG.isDebugEnabled()) LOG.debug("rewritten stmt: " + stmt.toSql()); + return stmt; + } + + /** + * Rewrite having subquery. + * Step1: rewrite having subquery to where subquery + * Step2: rewrite where subquery + * <p> + * For example: + * select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk + * having sum(cs_sales_price) > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk); + * <p> + * Step1: rewrite having subquery to where subquery + * Outer query is changed to inline view in rewritten query + * Inline view of outer query: + * from (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a + * Rewritten subquery of expr: + * where a.sum_cs_sales_price > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk) + * Rewritten query: + * select cs_item_sk, a.sum_cs_sales_price from + * (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a + * where a.sum_cs_sales_price > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk) + * <p> + * Step2: rewrite where subquery + * Inline view of subquery: + * from (select b.cs_item_sk, min(cs_sales_price) from catalog_sales b group by cs_item_sk) c + * Rewritten correlated predicate: + * where c.cs_item_sk = a.cs_item_sk and a.sum_cs_sales_price > c.min(cs_sales_price) + * + * @param stmt + * @param analyzer + */ + private static SelectStmt rewriteHavingClauseSubqueries(SelectStmt stmt, Analyzer analyzer) throws AnalysisException { + // extract having predicate + Expr havingPredicate = stmt.getHavingPred(); + Preconditions.checkState(havingPredicate != null); + Preconditions.checkState(havingPredicate.getSubquery() != null); + // extract limit + long limit = stmt.getLimit(); + // extract order by element + ArrayList<OrderByElement> orderByElements = stmt.getOrderByElements(); + // extract result of stmt + List<Expr> leftExprList = stmt.getResultExprs(); + // extract table alias generator + TableAliasGenerator tableAliasGenerator = stmt.getTableAliasGenerator(); + + /* + * The outer query is changed to inline view without having predicate + * For example: + * Query: select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk having ...; + * Inline view: + * from (select cs_item_sk $ColumnA, sum(cs_sales_price) $ColumnB from catalog_sales a group by cs_item_sk) $TableA + */ + SelectStmt inlineViewQuery = (SelectStmt) stmt.clone(); + inlineViewQuery.reset(); + // the having, order by and limit should be move to outer query + inlineViewQuery.removeHavingClause(); + inlineViewQuery.removeOrderByElements(); + inlineViewQuery.removeLimitElement(); + // add a new alias for all of columns in subquery + List<String> colAliasOfInlineView = Lists.newArrayList(); + for (int i = 0; i < inlineViewQuery.getSelectList().getItems().size(); ++i) { + colAliasOfInlineView.add(inlineViewQuery.getColumnAliasGenerator().getNextAlias()); + } + InlineViewRef inlineViewRef = new InlineViewRef(tableAliasGenerator.getNextAlias(), inlineViewQuery, + colAliasOfInlineView); + try { + inlineViewRef.analyze(analyzer); + } catch (UserException e) { + throw new AnalysisException(e.getMessage()); + } + LOG.debug("Outer query is changed to " + inlineViewRef.tableRefToSql()); + + /* + * Columns which belong to outer query can substitute for output columns of inline view + * For example: + * Having predicate: sum(cs_sales_price) > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk); + * Order by: sum(cs_sales_price), a.cs_item_sk + * Columns which belong to outer query: sum(cs_sales_price), a.cs_item_sk + * SMap: <cs_item_sk $ColumnA> <sum(cs_sales_price) $ColumnB> + * After substitute: $ColumnB > + * (select min(cs_sales_price) from catalog_sales b where $ColumnA = b.cs_item_sk) + * Order by: $ColumnB, $ColumnA + */ + /* + * Prepare select list of new query. + * Generate a new select item for each original columns in select list + */ + ExprSubstitutionMap smap = new ExprSubstitutionMap(); + SelectList newSelectList = new SelectList(); + for (int i = 0; i < inlineViewQuery.getSelectList().getItems().size(); i++) { + Expr leftExpr = leftExprList.get(i); + Expr rightExpr = new SlotRef(inlineViewRef.getAliasAsName(), colAliasOfInlineView.get(i)); + rightExpr.analyze(analyzer); + smap.put(leftExpr, rightExpr); + // construct outer query select list + SelectListItem selectListItem = new SelectListItem(rightExpr, stmt.getColLabels().get(i)); + newSelectList.addItem(selectListItem); + } + havingPredicate.reset(); + Expr newWherePredicate = havingPredicate.substitute(smap, analyzer,false); + LOG.debug("Having predicate is changed to " + newWherePredicate.toSql()); + ArrayList<OrderByElement> newOrderByElements = null; + if (orderByElements != null) { + newOrderByElements = OrderByElement.substitute(orderByElements, smap, analyzer); + LOG.debug("Order by is changed to " + Joiner.on(",").join(newOrderByElements)); + } + + // construct rewritten query + List<TableRef> newTableRefList = Lists.newArrayList(); + newTableRefList.add(inlineViewRef); + FromClause newFromClause = new FromClause(newTableRefList); + SelectStmt result = new SelectStmt(newSelectList, newFromClause, newWherePredicate, null, null, + newOrderByElements, new LimitElement(limit)); + result.setTableAliasGenerator(tableAliasGenerator); + try { + result.analyze(analyzer); + } catch (UserException e) { + throw new AnalysisException(e.getMessage()); + } + LOG.info("New stmt {} is constructed after rewritten subquery of having clause.", result.toSql()); + + // rewrite where subquery + result = rewriteSelectStatement(result, analyzer); + LOG.info("The final stmt is " + result.toSql()); Review comment: ```suggestion LOG.debug("The final stmt is " + result.toSql()); ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org