EmmyMiao87 commented on a change in pull request #3150: Support non-correlated subquery in having clause URL: https://github.com/apache/incubator-doris/pull/3150#discussion_r397577884
########## File path: fe/src/main/java/org/apache/doris/analysis/StmtRewriter.java ########## @@ -43,62 +44,260 @@ * Rewrite the statement of an analysis result. The unanalyzed rewritten * statement is returned. */ - public static void rewrite(Analyzer analyzer, StatementBase parsedStmt) + public static StatementBase rewrite(Analyzer analyzer, StatementBase parsedStmt) throws AnalysisException { if (parsedStmt instanceof QueryStmt) { QueryStmt analyzedStmt = (QueryStmt) parsedStmt; Preconditions.checkNotNull(analyzedStmt.analyzer); - rewriteQueryStatement(analyzedStmt, analyzer); + return rewriteQueryStatement(analyzedStmt, analyzer); } else if (parsedStmt instanceof InsertStmt) { final InsertStmt insertStmt = (InsertStmt)parsedStmt; final QueryStmt analyzedStmt = (QueryStmt)insertStmt.getQueryStmt(); Preconditions.checkNotNull(analyzedStmt.analyzer); - rewriteQueryStatement(analyzedStmt, analyzer); + QueryStmt rewrittenQueryStmt = rewriteQueryStatement(analyzedStmt, analyzer); + insertStmt.setQueryStmt(rewrittenQueryStmt); } else { throw new AnalysisException("Unsupported statement containing subqueries: " + parsedStmt.toSql()); } + return parsedStmt; } /** * Calls the appropriate rewrite method based on the specific type of query stmt. See * rewriteSelectStatement() and rewriteUnionStatement() documentation. */ - public static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer) + public static QueryStmt rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer) throws AnalysisException { Preconditions.checkNotNull(stmt); if (stmt instanceof SelectStmt) { - rewriteSelectStatement((SelectStmt) stmt, analyzer); + return rewriteSelectStatement((SelectStmt) stmt, analyzer); } else if (stmt instanceof SetOperationStmt) { rewriteUnionStatement((SetOperationStmt) stmt, analyzer); } else { throw new AnalysisException("Subqueries not supported for " + stmt.getClass().getSimpleName() + " statements"); } + return stmt; } - private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer) + private static SelectStmt rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer) throws AnalysisException { + SelectStmt result = stmt; // Rewrite all the subqueries in the FROM clause. - for (TableRef tblRef: stmt.fromClause_) { + for (TableRef tblRef: result.fromClause_) { if (!(tblRef instanceof InlineViewRef)) continue; InlineViewRef inlineViewRef = (InlineViewRef)tblRef; - rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer()); + QueryStmt rewrittenQueryStmt = rewriteQueryStatement(inlineViewRef.getViewStmt(), + inlineViewRef.getAnalyzer()); + inlineViewRef.setViewStmt(rewrittenQueryStmt); } // Rewrite all the subqueries in the WHERE clause. - if (stmt.hasWhereClause()) { + if (result.hasWhereClause()) { // Push negation to leaf operands. - stmt.whereClause = Expr.pushNegationToOperands(stmt.whereClause); + result.whereClause = Expr.pushNegationToOperands(result.whereClause); // Check if we can rewrite the subqueries in the WHERE clause. OR predicates with // subqueries are not supported. - if (hasSubqueryInDisjunction(stmt.whereClause)) { + if (hasSubqueryInDisjunction(result.whereClause)) { throw new AnalysisException("Subqueries in OR predicates are not supported: " - + stmt.whereClause.toSql()); + + result.whereClause.toSql()); } - rewriteWhereClauseSubqueries(stmt, analyzer); + rewriteWhereClauseSubqueries(result, analyzer); } - stmt.sqlString_ = null; - if (LOG.isDebugEnabled()) LOG.debug("rewritten stmt: " + stmt.toSql()); + // Rewrite all subquery in the having clause + if (result.getHavingPred() != null && result.getHavingPred().getSubquery() != null) { + result = rewriteHavingClauseSubqueries(result, analyzer); + } + result.sqlString_ = null; + if (LOG.isDebugEnabled()) LOG.debug("rewritten stmt: " + result.toSql()); + return result; + } + + /** + * Rewrite having subquery. + * Step1: rewrite having subquery to where subquery + * Step2: rewrite where subquery + * <p> + * For example: + * select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk + * having sum(cs_sales_price) > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk); + * <p> + * Step1: rewrite having subquery to where subquery + * Outer query is changed to inline view in rewritten query + * Inline view of outer query: + * from (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a + * Rewritten subquery of expr: + * where a.sum_cs_sales_price > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk) + * Rewritten query: + * select cs_item_sk, a.sum_cs_sales_price from + * (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a + * where a.sum_cs_sales_price > + * (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = b.cs_item_sk) + * <p> + * Step2: rewrite where subquery + * Inline view of subquery: + * from (select b.cs_item_sk, min(cs_sales_price) from catalog_sales b group by cs_item_sk) c + * Rewritten correlated predicate: + * where c.cs_item_sk = a.cs_item_sk and a.sum_cs_sales_price > c.min(cs_sales_price) + * The final stmt: + * select a.cs_item_sk, a.sum_cs_sales_price from + * (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from catalog_sales group by cs_item_sk) a + * join + * (select b.cs_item_sk, min(b.cs_sales_price) min_cs_sales_price from catalog_sales b group by b.cs_item_sk) c + * where c.cs_item_sk = a.cs_item_sk and a.sum_cs_sales_price > c.min_cs_sales_price; + * + * @param stmt + * @param analyzer + */ + private static SelectStmt rewriteHavingClauseSubqueries(SelectStmt stmt, Analyzer analyzer) throws AnalysisException { + // prepare parameters + SelectList selectList = stmt.getSelectList(); + List<String> columnLables = stmt.getColLabels(); + Expr havingClause = stmt.getHavingClauseAfterAnaylzed(); + List<FunctionCallExpr> aggregateExprs = stmt.getAggInfo().getAggregateExprs(); + Preconditions.checkState(havingClause != null); + Preconditions.checkState(havingClause.getSubquery() != null); + List<OrderByElement> orderByElements = stmt.getOrderByElementsAfterAnalyzed(); + long limit = stmt.getLimit(); + TableAliasGenerator tableAliasGenerator = stmt.getTableAliasGenerator(); + + /* + * The outer query is changed to inline view without having predicate + * For example: + * Query: select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by cs_item_sk having ...; + * Inline view: + * from (select cs_item_sk $ColumnA, sum(cs_sales_price) $ColumnB from catalog_sales a group by cs_item_sk) $TableA Review comment: I am afraid of missing understanding. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org