Jackie-Jiang commented on code in PR #11234:
URL: https://github.com/apache/pinot/pull/11234#discussion_r1281506557


##########
pinot-query-runtime/src/test/resources/queries/QueryHints.json:
##########
@@ -51,37 +51,41 @@
       }
     },
     "queries": [
+      {
+        "description": "Group by partition column",
+        "sql": "SELECT {tbl1}.num, COUNT(*) FROM {tbl1} /*+ 
tableOptions(partition_key='num', partition_size='4') */ GROUP BY {tbl1}.num"
+      },
       {
         "description": "Colocated JOIN with partition column",
-        "sql": "SELECT /*+ joinOptions(is_colocated_by_join_keys='true') */ 
{tbl1}.num, {tbl1}.name, {tbl2}.num, {tbl2}.val FROM {tbl1} JOIN {tbl2} ON 
{tbl1}.num = {tbl2}.num"
+        "sql": "SELECT {tbl1}.num, {tbl1}.name, {tbl2}.num, {tbl2}.val FROM 
{tbl1} /*+ tableOptions(partition_key='num', partition_size='4') */ JOIN {tbl2} 
/*+ tableOptions(partition_key='num', partition_size='4') */ ON {tbl1}.num = 
{tbl2}.num"
       },
       {
         "description": "Colocated JOIN with partition column and group by 
partition column",
-        "sql": "SELECT /*+ joinOptions(is_colocated_by_join_keys='true'), 
aggOptions(is_partitioned_by_group_by_keys='true') */ {tbl1}.num, {tbl1}.name, 
SUM({tbl2}.num) FROM {tbl1} JOIN {tbl2} ON {tbl1}.num = {tbl2}.num GROUP BY 
{tbl1}.num, {tbl1}.name"
+        "sql": "SELECT /*+ aggOptions(is_partitioned_by_group_by_keys='true') 
*/ {tbl1}.num, {tbl1}.name, SUM({tbl2}.num) FROM {tbl1} /*+ 
tableOptions(partition_key='num', partition_size='4') */ JOIN {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ ON {tbl1}.num = 
{tbl2}.num GROUP BY {tbl1}.num, {tbl1}.name"
       },
       {
         "description": "Colocated JOIN with partition column and group by 
non-partitioned column",
-        "sql": "SELECT /*+ joinOptions(is_colocated_by_join_keys='true'), 
aggOptions(is_partitioned_by_group_by_keys='false') */ {tbl1}.name, 
SUM({tbl2}.num) FROM {tbl1} JOIN {tbl2} ON {tbl1}.num = {tbl2}.num GROUP BY 
{tbl1}.name"
+        "sql": "SELECT {tbl1}.name, SUM({tbl2}.num) FROM {tbl1} /*+ 
tableOptions(partition_key='num', partition_size='4') */ JOIN {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ ON {tbl1}.num = 
{tbl2}.num GROUP BY {tbl1}.name"
       },
       {
         "description": "Colocated, Dynamic broadcast SEMI-JOIN with partition 
column",
-        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast', 
is_colocated_by_join_keys='true') */ {tbl1}.num, {tbl1}.name FROM {tbl1} WHERE 
{tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} WHERE {tbl2}.val IN ('xxx', 
'yyy'))"
+        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast') */ 
{tbl1}.num, {tbl1}.name FROM {tbl1} /*+ tableOptions(partition_key='num', 
partition_size='4') */ WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ WHERE {tbl2}.val IN 
('xxx', 'yyy'))"
       },
       {
         "description": "Colocated, Dynamic broadcast SEMI-JOIN with partition 
column and group by partition column",
-        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast', 
is_colocated_by_join_keys='true'), 
aggOptions(is_partitioned_by_group_by_keys='true') */ {tbl1}.num, 
COUNT({tbl1}.name) FROM {tbl1} WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM 
{tbl2} WHERE {tbl2}.val IN ('xxx', 'yyy')) GROUP BY {tbl1}.num, {tbl1}.name"
+        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast'), 
aggOptions(is_partitioned_by_group_by_keys='true') */ {tbl1}.num, 
COUNT({tbl1}.name) FROM {tbl1} /*+ tableOptions(partition_key='num', 
partition_size='4') */ WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ WHERE {tbl2}.val IN 
('xxx', 'yyy')) GROUP BY {tbl1}.num, {tbl1}.name"
       },
       {
         "description": "Colocated, Dynamic broadcast SEMI-JOIN with partition 
column and group by non-partitioned column",
-        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast', 
is_colocated_by_join_keys='true') */ {tbl1}.name, COUNT(*) FROM {tbl1} WHERE 
{tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} WHERE {tbl2}.val IN ('xxx', 
'yyy')) GROUP BY {tbl1}.name"
+        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast') */ 
{tbl1}.name, COUNT(*) FROM {tbl1} /*+ tableOptions(partition_key='num', 
partition_size='4') */ WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ WHERE {tbl2}.val IN 
('xxx', 'yyy')) GROUP BY {tbl1}.name"
       },
       {
         "description": "Dynamic broadcast SEMI-JOIN with empty right table 
result",
-        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast') */ 
{tbl1}.name, COUNT(*) FROM {tbl1} WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM 
{tbl2} WHERE {tbl2}.val = 'non-exist') GROUP BY {tbl1}.name"
+        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast') */ 
{tbl1}.name, COUNT(*) FROM {tbl1} /*+ tableOptions(partition_key='num', 
partition_size='4') */ WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ WHERE {tbl2}.val = 
'non-exist') GROUP BY {tbl1}.name"
       },
       {
         "description": "Colocated, Dynamic broadcast SEMI-JOIN with partially 
empty right table result for some servers",
-        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast', 
is_colocated_by_join_keys='true') */ {tbl1}.name, COUNT(*) FROM {tbl1} WHERE 
{tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} WHERE {tbl2}.val = 'z') GROUP BY 
{tbl1}.name"
+        "sql": "SELECT /*+ joinOptions(join_strategy='dynamic_broadcast') */ 
{tbl1}.name, COUNT(*) FROM {tbl1} /*+ tableOptions(partition_key='num', 
partition_size='4') */ WHERE {tbl1}.num IN (SELECT {tbl2}.num FROM {tbl2} /*+ 
tableOptions(partition_key='num', partition_size='4') */ WHERE {tbl2}.val = 
'z') GROUP BY {tbl1}.name"

Review Comment:
   Currently `stageParallelism` is ignored. Can be added when we add the 
support to further split on single partition data



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to