ankitsultana commented on code in PR #16204: URL: https://github.com/apache/pinot/pull/16204#discussion_r2167821007
########## pinot-query-planner/src/test/resources/queries/PhysicalOptimizerPlans.json: ########## @@ -1,4 +1,154 @@ { + "physical_opt_chained_subqueries": { + "queries": [ + { + "description": "Sub-query with group-by with no agg calls and a limit", + "sql": "SET usePhysicalOptimizer=true; EXPLAIN PLAN FOR WITH tmp AS (SELECT DISTINCT col1, col2, ts FROM a LIMIT 100) SELECT col1, col2, RANK() OVER (PARTITION BY col2 ORDER BY ts DESC) AS rnk FROM tmp ORDER BY col2, rnk DESC", + "output": [ + "Execution Plan", + "\nPhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalSort(sort0=[$1], sort1=[$2], dir0=[ASC], dir1=[DESC])", + "\n PhysicalProject(col1=[$0], col2=[$1], $2=[$3])", + "\n PhysicalWindow(window#0=[window(partition {1} order by [2 DESC] aggs [RANK()])])", + "\n PhysicalSort(sort0=[$2], dir0=[DESC])", + "\n PhysicalSort(fetch=[100])", + "\n PhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalSort(fetch=[100])", + "\n PhysicalAggregate(group=[{0, 1, 2}], aggType=[FINAL], limit=[100])", + "\n PhysicalExchange(exchangeStrategy=[PARTITIONING_EXCHANGE], distKeys=[[0, 1, 2]])", + "\n PhysicalAggregate(group=[{0, 1, 7}], aggType=[LEAF], limit=[100])", + "\n PhysicalTableScan(table=[[default, a]])", + "\n" + ] + } + ] + }, + "physical_opt_group_by_hint_options": { + "queries": [ + { + "description": "", + "sql": "SET usePhysicalOptimizer=true; EXPLAIN PLAN FOR SELECT /*+ aggOptions(is_leaf_return_final_result='true') */ col1, COUNT(DISTINCT col2) AS cnt FROM a WHERE col3 >= 0 GROUP BY col1 ORDER BY cnt DESC LIMIT 10", + "output": [ + "Execution Plan", + "\nPhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalSort(sort0=[$1], dir0=[DESC], fetch=[10])", + "\n PhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalSort(sort0=[$1], dir0=[DESC], fetch=[10])", + "\n PhysicalAggregate(group=[{0}], agg#0=[DISTINCTCOUNT($1)], aggType=[FINAL], leafReturnFinalResult=[true])", + "\n PhysicalExchange(exchangeStrategy=[PARTITIONING_EXCHANGE], distKeys=[[0]])", + "\n PhysicalAggregate(group=[{0}], agg#0=[DISTINCTCOUNT($1)], aggType=[LEAF], leafReturnFinalResult=[true])", + "\n PhysicalFilter(condition=[>=($2, 0)])", + "\n PhysicalTableScan(table=[[default, a]])", + "\n" + ] + }, + { + "description": "", + "sql": "SET usePhysicalOptimizer=true; EXPLAIN PLAN FOR SELECT /*+ aggOptions(is_skip_leaf_stage_group_by='true') */ col1, MAX(ts) FROM a GROUP BY col1", + "output": [ + "Execution Plan", + "\nPhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalAggregate(group=[{0}], agg#0=[MAX($7)], aggType=[DIRECT])", + "\n PhysicalExchange(exchangeStrategy=[PARTITIONING_EXCHANGE], distKeys=[[0]])", + "\n PhysicalTableScan(table=[[default, a]])", + "\n" + ] + }, + { + "description": "Select aggregates with filters and select alias. The group by aggregate hint should be a no-op.", + "sql": "SET usePhysicalOptimizer=true; EXPLAIN PLAN FOR SELECT /*+ aggOptions(is_skip_leaf_stage_group_by='true') */ AVG(a.col3) as avg, COUNT(*) as count FROM a WHERE a.col3 >= 0 AND a.col2 = 'pink floyd'", + "output": [ + "Execution Plan", + "\nPhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalProject(avg=[/(CAST(CASE(=($1, 0), null:BIGINT, $0)):DOUBLE, $1)], count=[$1])", + "\n PhysicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)], aggType=[FINAL])", + "\n PhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()], aggType=[LEAF])", + "\n PhysicalFilter(condition=[AND(>=($2, 0), =($1, _UTF-8'pink floyd'))])", + "\n PhysicalTableScan(table=[[default, a]])", + "\n" + ] + }, + { + "description": "Select aggregates with filters and select alias. The group by aggregate hint should be a no-op.", + "sql": "SET usePhysicalOptimizer=true; EXPLAIN PLAN FOR SELECT /*+ aggOptions(is_skip_leaf_stage_group_by='true') */ SUM(a.col3) as sum, COUNT(*) as count FROM a WHERE a.col3 >= 0 AND a.col2 = 'pink floyd'", + "output": [ + "Execution Plan", + "\nPhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalProject(sum=[CASE(=($1, 0), null:BIGINT, $0)], count=[$1])", + "\n PhysicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)], aggType=[FINAL])", + "\n PhysicalExchange(exchangeStrategy=[SINGLETON_EXCHANGE])", + "\n PhysicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()], aggType=[LEAF])", + "\n PhysicalFilter(condition=[AND(>=($2, 0), =($1, _UTF-8'pink floyd'))])", + "\n PhysicalTableScan(table=[[default, a]])", + "\n" + ] + }, + { + "description": "Example of query that avoids exchanges for aggregates", + "sql": "SET usePhysicalOptimizer=true; EXPLAIN PLAN FOR with teamOne as (select col2, percentile(col3, 50) as sum_of_runs from a group by col2), teamTwo as (select col2, percentile(col3, 50) as sum_of_runs from a group by col2), all as (select col2, sum_of_runs from teamOne union all select col2, sum_of_runs from teamTwo) select col2, percentile(sum_of_runs, 50) from all group by col2", Review Comment: Query taken from `AggregatePlans.json`: https://github.com/apache/pinot/blob/472c53d49cc644d0231e11d7ee0e1bf26e76917d/pinot-query-planner/src/test/resources/queries/AggregatePlans.json#L138-L159 The plan with v2 optimizer is better in the multiple ways: 1. We automatically can detect that the group-by column, `col2`, is the partitioning column. So the leaf stages execute the aggregate directly instead of splitting them. 2. We are automatically able to skip the exchange required for the final aggregation above the union. In other words, what the existing optimizer is not able to achieve even with hints, we can achieve without any hints. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org