EmmyMiao87 opened a new issue #2848: Support subquery in having clause
URL: https://github.com/apache/incubator-doris/issues/2848
 
 
   **Is your feature request related to a problem? Please describe.**
   Doris does not support subquery in having clause.
   
   For example:
   
   ```
   select a, sum(b) from t 
   group by a
   having sum(b) =(subquery)
   ```
   TPC-DS 23,24,44 query
   
   ```
   Query 23:
   WITH frequent_ss_items AS
   (
   
   SELECT substr(i_item_desc,1,30) itemdesc,
   i_item_sk item_sk,
   d_date solddate,
   count(*) cnt
   FROM store_sales ,date_dim ,item
   WHERE ss_sold_date_sk = d_date_sk
   AND ss_item_sk = i_item_sk
   AND d_year IN (1999,1999+1,1999+2,1999+3)
   GROUP BY substr(i_item_desc,1,30),i_item_sk,d_date
   HAVING count(*) >4
   
   ), max_store_sales AS
   (
   
   SELECT max(csales) tpcds_cmax
   FROM
   (
   
   SELECT c_customer_sk,
   sum(ss_quantity*ss_sales_price) csales
   FROM store_sales ,customer ,date_dim
   WHERE ss_customer_sk = c_customer_sk
   AND ss_sold_date_sk = d_date_sk
   AND d_year IN (1999,1999+1,1999+2,1999+3)
   GROUP BY c_customer_sk
   
   ) c
   
   ), best_ss_customer AS
   (
   
   SELECT c_customer_sk,
   sum(ss_quantity*ss_sales_price) ssales
   FROM store_sales ,customer
   WHERE ss_customer_sk = c_customer_sk
   GROUP BY c_customer_sk
   HAVING sum(ss_quantity*ss_sales_price) > (95/100.0) *
   (
   
   SELECT *
   FROM max_store_sales
   
   )
   
   )
   SELECT sum(sales)
   FROM
   (
   
   SELECT cs_quantity*cs_list_price sales
   FROM catalog_sales ,date_dim
   WHERE d_year = 1999
   AND d_moy = 1
   AND cs_sold_date_sk = d_date_sk
   AND cs_item_sk IN
   (
   
   SELECT item_sk
   FROM frequent_ss_items
   
   )
   AND cs_bill_customer_sk IN
   (
   
   SELECT c_customer_sk
   FROM best_ss_customer
   
   )
   UNION
   all SELECT ws_quantity*ws_list_price sales
   FROM web_sales ,date_dim
   WHERE d_year = 1999
   AND d_moy = 1
   AND ws_sold_date_sk = d_date_sk
   AND ws_item_sk IN
   (
   
   SELECT item_sk
   FROM frequent_ss_items
   
   )
   AND ws_bill_customer_sk IN
   (
   
   SELECT c_customer_sk
   FROM best_ss_customer
   
   )
   
   ) a LIMIT 100;
   WITH frequent_ss_items AS
   (
   
   SELECT substr(i_item_desc,1,30) itemdesc,
   i_item_sk item_sk,
   d_date solddate,
   count(*) cnt
   FROM store_sales ,date_dim ,item
   WHERE ss_sold_date_sk = d_date_sk
   AND ss_item_sk = i_item_sk
   AND d_year IN (1999,1999 + 1,1999 + 2,1999 + 3)
   GROUP BY substr(i_item_desc,1,30),i_item_sk,d_date
   HAVING count(*) >4
   
   ), max_store_sales AS
   (
   
   SELECT max(csales) tpcds_cmax
   FROM
   (
   
   SELECT c_customer_sk,
   sum(ss_quantity*ss_sales_price) csales
   FROM store_sales ,customer ,date_dim
   WHERE ss_customer_sk = c_customer_sk
   AND ss_sold_date_sk = d_date_sk
   AND d_year IN (1999,1999+1,1999+2,1999+3)
   GROUP BY c_customer_sk
   
   ) d
   
   ), best_ss_customer AS
   (
   
   SELECT c_customer_sk,
   sum(ss_quantity*ss_sales_price) ssales
   FROM store_sales ,customer
   WHERE ss_customer_sk = c_customer_sk
   GROUP BY c_customer_sk
   HAVING sum(ss_quantity*ss_sales_price) > (95/100.0) *
   (
   
   SELECT *
   FROM max_store_sales
   
   )
   
   )
   SELECT c_last_name,
   c_first_name,
   sales
   FROM
   (
   
   SELECT c_last_name,
   c_first_name,
   sum(cs_quantity*cs_list_price) sales
   FROM catalog_sales ,customer ,date_dim
   WHERE d_year = 1999
   AND d_moy = 1
   AND cs_sold_date_sk = d_date_sk
   AND cs_item_sk IN
   (
   
   SELECT item_sk
   FROM frequent_ss_items
   
   )
   AND cs_bill_customer_sk IN
   (
   
   SELECT c_customer_sk
   FROM best_ss_customer
   
   )
   AND cs_bill_customer_sk = c_customer_sk
   GROUP BY c_last_name,c_first_name
   UNION
   all SELECT c_last_name,
   c_first_name,
   sum(ws_quantity*ws_list_price) sales
   FROM web_sales ,customer ,date_dim
   WHERE d_year = 1999
   AND d_moy = 1
   AND ws_sold_date_sk = d_date_sk
   AND ws_item_sk IN
   (
   
   SELECT item_sk
   FROM frequent_ss_items
   
   )
   AND ws_bill_customer_sk IN
   (
   
   SELECT c_customer_sk
   FROM best_ss_customer
   
   )
   AND ws_bill_customer_sk = c_customer_sk
   GROUP BY c_last_name,c_first_name
   
   ) b
   ORDER BY c_last_name,c_first_name,sales
   
   LIMIT 100;
   ```
   
   ```
   Query 24
   WITH ssales AS
   (
   
         SELECT c_last_name ,
               c_first_name ,
               s_store_name ,
               ca_state ,
               s_state ,
               i_color ,
               i_current_price ,
               i_manager_id ,
               i_units ,
               i_size ,
               sum(ss_sales_price) netpaid
         FROM store_sales ,store_returns ,store ,item ,customer 
,customer_address
         WHERE ss_ticket_number = sr_ticket_number
               AND ss_item_sk = sr_item_sk
               AND ss_customer_sk = c_customer_sk
               AND ss_item_sk = i_item_sk
               AND ss_store_sk = s_store_sk
               AND c_birth_country = upper(ca_country)
               AND s_zip = ca_zip
               AND s_market_id=7
         GROUP BY c_last_name ,c_first_name ,s_store_name ,ca_state ,s_state 
,i_color ,i_current_price ,i_manager_id ,i_units ,i_size
   
   )
   SELECT c_last_name ,
         c_first_name ,
         s_store_name ,
         sum(netpaid) paid
   FROM ssales
   WHERE i_color = 'orchid'
   GROUP BY c_last_name ,c_first_name ,s_store_name
   HAVING sum(netpaid) >
   (
   
         SELECT 0.05*avg(netpaid)
         FROM ssales
   
   ) ;
   WITH ssales AS
   (
   
         SELECT c_last_name ,
               c_first_name ,
               s_store_name ,
               ca_state ,
               s_state ,
               i_color ,
               i_current_price ,
               i_manager_id ,
               i_units ,
               i_size ,
               sum(ss_sales_price) netpaid
         FROM store_sales ,store_returns ,store ,item ,customer 
,customer_address
         WHERE ss_ticket_number = sr_ticket_number
               AND ss_item_sk = sr_item_sk
               AND ss_customer_sk = c_customer_sk
               AND ss_item_sk = i_item_sk
               AND ss_store_sk = s_store_sk
               AND c_birth_country = upper(ca_country)
               AND s_zip = ca_zip
               AND s_market_id = 7
         GROUP BY c_last_name ,c_first_name ,s_store_name ,ca_state ,s_state 
,i_color ,i_current_price ,i_manager_id ,i_units ,i_size
   
   )
   SELECT c_last_name ,
         c_first_name ,
         s_store_name ,
         sum(netpaid) paid
   FROM ssales
   WHERE i_color = 'chiffon'
   GROUP BY c_last_name ,c_first_name ,s_store_name
   HAVING sum(netpaid) >
   (
   
         SELECT 0.05*avg(netpaid)
         FROM ssales
   
   ) ;
   ```
   
   ```
   Query 44
   SELECT asceding.rnk, i1.i_product_name AS best_performing, i2.i_product_name 
AS worst_performing
   FROM
   
   (
         SELECT *
         FROM
   
         (
               SELECT item_sk, rank() OVER (ORDER BY rank_col ASC) AS rnk
               FROM
   
               (
                     SELECT ss_item_sk AS item_sk, AVG(ss_net_profit) AS 
rank_col
                    FROM store_sales ss1
                     WHERE ss_store_sk = 410
                     GROUP BY ss_item_sk
                     HAVING AVG(ss_net_profit) > 0.9 *
   
                     (
                           SELECT AVG(ss_net_profit) AS rank_col
                           FROM store_sales
                           WHERE ss_store_sk = 410
                                 AND ss_hdemo_sk IS NULL
                           GROUP BY ss_store_sk
                     )
               ) V1
         ) V11
         WHERE rnk < 11
   ) asceding,
   
   (
         SELECT *
         FROM (
               SELECT item_sk, rank() OVER (ORDER BY rank_col DESC) AS rnk
               FROM
   
               (
                     SELECT ss_item_sk AS item_sk, AVG(ss_net_profit) AS 
rank_col
                     FROM store_sales ss1
                     WHERE ss_store_sk = 410
                     GROUP BY ss_item_sk
                     HAVING AVG(ss_net_profit) > 0.9 *
   
                     (
                           SELECT AVG(ss_net_profit) AS rank_col
                           FROM store_sales
                           WHERE ss_store_sk = 410
                                 AND ss_hdemo_sk IS NULL
                           GROUP BY ss_store_sk
                     )
               ) V2
         ) V21
         WHERE rnk < 11
   ) descending, item i1, item i2
   WHERE (asceding.rnk = descending.rnk
         AND i1.i_item_sk = asceding.item_sk
         AND i2.i_item_sk = descending.item_sk)
   ORDER BY asceding.rnk
   LIMIT 100;
   ```
   
   **Describe the solution you'd like**
   The subquery in having could be supported.
   For example:
   select a , sum(b) from t group by a having sum(b) = subquery1
     
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to