Copilot commented on code in PR #60260: URL: https://github.com/apache/doris/pull/60260#discussion_r2730784026
########## regression-test/suites/nereids_function_p0/gen_function/unnest_order_by_list_test.groovy: ########## @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_order_by_list_test", "unnest") { + + String prefix_str = "unnest_order_by_list_" + def tb_name1 = prefix_str + "table1" + def tb_name2 = prefix_str + "table2" + + sql """drop table if exists ${tb_name1}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name1} ( + user_id INT, + user_name VARCHAR(20), + scores ARRAY<INT>, + login_dates BITMAP + ) + DUPLICATE KEY(user_id) + DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name1} VALUES + (1, 'Alice', [90, 70, 85], bitmap_from_string("10, 20")), + (2, 'Bob', [60, 80], bitmap_from_string("5, 15")), + (3, 'Charlie', [95], bitmap_from_string("30"));""" + + // Test ordering the result set by the unnested value. + qt_order_by_unnested_value """SELECT user_name, s.val FROM ${tb_name1}, UNNEST(scores) AS s(val) ORDER BY s.val DESC;""" + + // Test ordering by both the position (ordinality) and the value from UNNEST. + qt_order_by_ordinality_and_value """SELECT user_name, s.val, s.pos+1 FROM ${tb_name1}, UNNEST(scores) WITH ORDINALITY AS s(pos, val) ORDER BY s.pos ASC, s.val DESC;""" + + // Test ordering by the value unnested from a bitmap. + qt_order_by_unnested_bitmap """SELECT user_name, b.day FROM ${tb_name1}, UNNEST(login_dates) AS b(day) ORDER BY b.day ASC;""" + + // Test ordering by a regular column and the position from UNNEST. + qt_order_by_column_and_ordinality """ + SELECT user_name, s.val, s.pos + FROM ${tb_name1}, UNNEST(scores) WITH ORDINALITY AS s(pos, val) + ORDER BY user_name ASC, s.pos ASC;""" + + // Test ordering by an expression that involves the unnested value. + qt_order_by_expression_on_unnested_value """ + SELECT user_name, s.val + FROM ${tb_name1}, UNNEST(scores) AS s(val) + ORDER BY ABS(s.val - 80) ASC;""" + + // Test ordering by an aggregate column and the unnested column after a GROUP BY operation. + qt_order_by_aggregate_and_unnested_value """SELECT s.val, COUNT(*) as freq FROM ${tb_name1}, UNNEST(scores) AS s(val) GROUP BY s.val ORDER BY freq DESC, s.val ASC;""" + + // Test using UNNEST in the SELECT list and ordering by its resulting alias. + qt_order_by_unnest_alias_in_select """SELECT user_name, UNNEST(scores) AS score_val FROM ${tb_name1} ORDER BY score_val DESC;""" + + // Test ordering by the UNNEST function call directly in the ORDER BY clause. + qt_order_by_unnest_function_call """SELECT user_name FROM ${tb_name1} ORDER BY UNNEST(scores);""" + + sql """drop table if exists ${tb_name2}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name2} ( + user_id INT, + subject VARCHAR(20), + history_scores ARRAY<INT> + ) + DUPLICATE KEY(user_id) + DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name2} VALUES + (1, 'Math', [80, 95, 70]), + (1, 'English', [90, 85]), + (2, 'Math', [60, 75]);""" + + // Test using the unnested value within the ORDER BY clause of a window function. + qt_window_function_order_by_unnested_value """ + SELECT + user_id, + subject, + s.val, + RANK() OVER (PARTITION BY user_id, subject ORDER BY s.val DESC) as score_rank + FROM ${tb_name2}, UNNEST(history_scores) AS s(val);""" + + // Test an ORDER BY clause on a query that involves WHERE and UNNEST. + qt_order_by_after_where_and_unnest """ + SELECT + user_id, + s.val, + s.pos + FROM ${tb_name2}, UNNEST(history_scores) WITH ORDINALITY AS s(val, pos) Review Comment: `UNNEST(... ) WITH ORDINALITY AS s(val, pos)` swaps the meaning of `val`/`pos` compared to the other ORDER BY tests in this file (which use `AS s(pos, val)`). This makes the query hard to read and can lead to accidental misuse of the ordinality vs element columns; use a consistent alias order and update the selected/ordered columns accordingly. ```suggestion FROM ${tb_name2}, UNNEST(history_scores) WITH ORDINALITY AS s(pos, val) ``` ########## regression-test/suites/nereids_function_p0/gen_function/unnest_map_border_test.groovy: ########## @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_map_border_test", "unnest") { + + // normal array + // Test the basic unnesting of a map into key-value pairs. + qt_unnest_simple_map """ + SELECT + user_id, + k, + v + FROM ( + SELECT + 101 AS user_id, + MAP('category', 'electronics', 'rank', 'A', 'region', 'East') AS attr_map + ) t, + UNNEST(attr_map) AS v(k, v) Review Comment: `UNNEST(attr_map) AS v(k, v)` reuses `v` as both the table alias and a column alias, which is ambiguous to readers and easy to misreference in follow-up edits. Use distinct names for the table alias and the value column (e.g., alias the table as `m` and the value column as `val`). ```suggestion UNNEST(attr_map) AS m(k, v) ``` ########## regression-test/suites/nereids_function_p0/gen_function/unnest_join_list_test.groovy: ########## @@ -0,0 +1,276 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_join_list_test", "unnest") { + + String prefix_str = "unnest_join_list_" + def tb_name1 = prefix_str + "table1" + def tb_name2 = prefix_str + "table2" + def tb_name3 = prefix_str + "table3" + def tb_name4 = prefix_str + "table4" + def tb_name5 = prefix_str + "table5" + def tb_name6 = prefix_str + "table6" + + sql """drop table if exists ${tb_name1}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name1} ( + u_id INT, + u_name VARCHAR(50), + u_tags ARRAY<STRING>, + u_logins BITMAP, + u_ext MAP<STRING, STRING> + ) + DUPLICATE KEY(u_id) + DISTRIBUTED BY HASH(u_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name1} VALUES + (1, 'Alice', ['work', 'gym'], bitmap_from_string("101, 102"), {'city': 'shanghai'}), + (2, 'Bob', ['game'], bitmap_empty(), {'city': 'beijing'}), + (3, 'Charlie', [], to_bitmap(103), {}), + (101, 'Alice', ['work', 'gym'], bitmap_from_string("101, 102"), {'city': 'shanghai'}), + (102, 'Bob', ['game'], bitmap_empty(), {'city': 'beijing'}), + (103, 'Charlie', [], to_bitmap(103), {});""" + + sql """drop table if exists ${tb_name2}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name2} ( + tag_name VARCHAR(50), + tag_group VARCHAR(20) + ) + DISTRIBUTED BY HASH(tag_name) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """INSERT INTO ${tb_name2} VALUES ('work', 'Business'), ('gym', 'Health'), ('game', 'Entertain');""" + + sql """drop table if exists ${tb_name3}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name3} ( + category_name VARCHAR(20), + manager_name VARCHAR(50), + office_location VARCHAR(50) + ) + DISTRIBUTED BY HASH(category_name) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name3} VALUES + ('Business', 'David', 'Tower A'), + ('Health', 'Eve', 'Tower B'), + ('Entertain', 'Frank', 'Tower C');""" + + // lateral join + // Test a lateral join using `JOIN UNNEST` with an ON true condition. + qt_lateral_join_unnest """SELECT u.u_name, t.tag, t.pos FROM ${tb_name1} u JOIN UNNEST(u.u_tags) WITH ORDINALITY AS t(pos, tag) ON true ORDER BY u.u_name, t.pos;""" + + // on filter + // Test a lateral join with a filter on the unnested table in the ON clause. + qt_lateral_join_unnest_with_on_filter """SELECT u.u_name, t.tag FROM ${tb_name1} u JOIN UNNEST(u.u_tags) WITH ORDINALITY AS t(pos, tag) ON t.pos = 1 ORDER BY u.u_name, t.tag;""" + + // more table join + // Test a chain of joins following an UNNEST operation. + qt_chain_join_after_unnest """SELECT u.u_name, t.tag, d.tag_group FROM ${tb_name1} u JOIN UNNEST(u.u_tags) WITH ORDINALITY AS t(pos, tag) ON true JOIN ${tb_name2} d ON t.tag = d.tag_name ORDER BY u.u_name, t.tag, d.tag_group;""" + + // bitmap + // Test unnesting a bitmap and then joining the results back to the same table. + qt_unnest_bitmap_and_join_back """ + SELECT u.u_name AS owner, b.login_id, u2.u_name AS visitor + FROM ${tb_name1} u + JOIN UNNEST(u.u_logins) AS b(login_id) ON true + JOIN ${tb_name1} u2 ON b.login_id = u2.u_id + ORDER BY owner, login_id, visitor;""" + + test { + // Test that using UNNEST directly within a JOIN condition is not allowed and fails as expected. + sql """SELECT u.u_name AS owner, b.login_id, u2.u_name AS visitor + FROM ${tb_name1} u + JOIN UNNEST(u.u_logins) AS b(login_id) ON true + JOIN ${tb_name1} u2 ON b.login_id = unnest(u2.u_logins); + """ + exception "table generating function is not allowed in LOGICAL_PROJECT" + } + + // map + // Test joining with an unnested map and applying a filter in the ON clause. + qt_join_unnest_map_with_filter """ + SELECT u.u_name, m.entry + FROM ${tb_name1} u + JOIN UNNEST(u.u_ext) AS m(entry) ON m.entry LIKE '%city%' + ORDER BY u.u_name, m.entry;""" + + // left join + // Test a LEFT JOIN with UNNEST, preserving left-side rows for empty arrays. + qt_left_join_unnest_array """ + SELECT u.u_name, t.tag, t.pos + FROM ${tb_name1} u + LEFT JOIN UNNEST(u.u_tags) WITH ORDINALITY AS t(tag, pos) ON true + ORDER BY u.u_name, t.pos;""" + Review Comment: This file mixes `WITH ORDINALITY AS t(pos, tag)` and `WITH ORDINALITY AS t(tag, pos)`. Given the expected outputs, `t.tag` ends up being the ordinality in the latter form, which is confusing and inconsistent with surrounding queries. Please standardize the alias order (and referenced columns) so `tag` consistently refers to the unnested element and `pos/ord` to the ordinality. ########## regression-test/suites/nereids_function_p0/gen_function/unnest_group_by_list_test.groovy: ########## @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_group_by_list_test", "unnest") { + + String prefix_str = "unnest_group_by_list_" + def tb_name1 = prefix_str + "table1" + def tb_name2 = prefix_str + "table2" + + sql """drop table if exists ${tb_name1}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name1} ( + user_id INT, + dept_name VARCHAR(20), + tags ARRAY<STRING>, + scores BITMAP, + kv_data MAP<STRING, STRING> + ) + DUPLICATE KEY(user_id) + DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name1} VALUES + (1, 'Sales', ['high', 'potential'], bitmap_from_string("80, 90"), {'level': 'A'}), + (2, 'Sales', ['potential', 'new'], bitmap_from_string("70, 80"), {'level': 'B'}), + (3, 'Tech', ['high', 'expert'], bitmap_from_string("90, 95"), {'level': 'A'}), + (4, 'Tech', ['expert'], bitmap_from_string("95"), {'level': 'A'});""" + + // array + // Test grouping by an unnested array column and counting the occurrences of each element. + qt_group_by_unnested_array """SELECT t.tag, COUNT(*) FROM ${tb_name1}, UNNEST(tags) AS t(tag) GROUP BY t.tag ORDER BY t.tag;""" + + // bitmap + // Test grouping by an unnested bitmap column and counting distinct users for each value. + qt_group_by_unnested_bitmap """SELECT b.score, COUNT(DISTINCT user_id) FROM ${tb_name1}, UNNEST(scores) AS b(score) GROUP BY b.score ORDER BY b.score;""" + + // map + // Test grouping by the values of an unnested map and counting their occurrences. + qt_group_by_unnested_map """SELECT m.item, COUNT(*) FROM ${tb_name1}, UNNEST(kv_data) AS m(item) GROUP BY m.item ORDER BY m.item;""" + + // Test grouping by both a regular column and an unnested column. + qt_group_by_column_and_unnested_column """ + SELECT + dept_name col1, + t.tag col2, + COUNT(*) as tag_count + FROM ${tb_name1}, UNNEST(tags) AS t(tag) + GROUP BY dept_name, t.tag + ORDER BY col1, col2, tag_count DESC;""" + + // with ORDINALITY + // Test grouping on an unnested column after filtering by the element's position from ORDINALITY. + qt_group_by_unnested_array_with_ordinality_filter """ + SELECT + t.tag, + COUNT(*) + FROM ${tb_name1}, UNNEST(tags) WITH ORDINALITY AS t(pos, tag) + WHERE t.pos = 1 + GROUP BY t.tag + ORDER BY t.tag;""" + + // Test GROUP BY ROLLUP with a regular column and an unnested column. + qt_rollup_with_unnested_column """ + SELECT + dept_name, + t.tag, + COUNT(*) + FROM ${tb_name1}, UNNEST(tags) AS t(tag) + GROUP BY ROLLUP(dept_name, t.tag) + ORDER BY dept_name, t.tag;""" + + // Test grouping by a regular column and an UNNEST function call directly. + qt_group_by_unnest_function """SELECT dept_name FROM ${tb_name1} GROUP BY dept_name, UNNEST(tags) ORDER BY dept_name;""" + + test { + // Test that using an aggregate function on an UNNEST result in the SELECT list and grouping by it is invalid. + sql """SELECT SUM(UNNEST(scores)) as col1 FROM ${tb_name1} group by col1;""" + exception "GROUP BY expression must not contain aggregate functions" + } + + sql """drop table if exists ${tb_name2}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name2} ( + user_id INT, + dept_name VARCHAR(20), + position VARCHAR(20), + skills ARRAY<STRING> + ) + DUPLICATE KEY(user_id) + DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name2} VALUES + (1, 'R&D', 'Manager', ['Java', 'Go']), + (2, 'R&D', 'Dev', ['Java', 'SQL']), + (3, 'Sales', 'Manager', ['SQL']), + (4, 'Sales', 'Trainee', ['Excel', 'SQL']);""" + + // rollup + // Test GROUP BY ROLLUP with a column and an unnested column from a CROSS JOIN. + qt_rollup_with_cross_join_unnest """ + SELECT + dept_name, + s.skill, + COUNT(DISTINCT user_id) as user_count + FROM ${tb_name2} + CROSS JOIN UNNEST(skills) WITH ORDINALITY AS s(skill, pos) + GROUP BY ROLLUP(dept_name, s.skill) + ORDER BY dept_name, s.skill;""" Review Comment: `WITH ORDINALITY AS s(skill, pos)` makes the column naming misleading (in current expected outputs, `s.skill` is the ordinality values 0/1/…). This is very hard to reason about in GROUPING SETS/CUBE queries—please swap/rename the aliases so `skill` refers to the unnested element and `pos` (or `ord`) refers to the ordinality. ########## regression-test/suites/nereids_function_p0/gen_function/unnest_from_list_test.groovy: ########## @@ -0,0 +1,215 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_from_list_test", "unnest") { + + String prefix_str = "unnest_from_list_" + def tb_name1 = prefix_str + "table1" + def tb_name2 = prefix_str + "table2" + def tb_name3 = prefix_str + "table3" + + sql """drop table if exists ${tb_name1}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name1} ( + user_id INT, + user_name VARCHAR(50), + tags ARRAY<STRING>, + tags_2 ARRAY<array<STRING>>, + login_days BITMAP, + ext_info MAP<STRING, STRING>, + u_id INT, + u_name VARCHAR(50), + user_tags ARRAY<STRING>, + user_tags_2 ARRAY<STRING> + ) + DUPLICATE KEY(user_id) + DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name1} VALUES + (1, 'Alice', ['tech', 'music'], [['tech', 'music'], ['tech', 'music']], to_bitmap(1), {'source': 'web', 'level': 'gold'}, 1, 'Alice', ['Java', 'SQL', 'str3'], ['Java', 'SQL']), + (2, 'Bob', ['sport'], [['sport'], ['sport']], bitmap_from_string('1,2'), {'source': 'app'}, 1, 'Alice', ['Java', 'SQL', 'str3'], ['Java', 'SQL']), + (3, 'Charlie', [], [[]], bitmap_empty(), {}, 1, 'Alice', ['Java', 'SQL', 'str3'], ['Java', 'SQL']);""" + + sql """drop table if exists ${tb_name2}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name2} ( + tag_name VARCHAR(50), + tag_group VARCHAR(50), + d_id INT, + dept_name VARCHAR(50), + dept_tags ARRAY<STRING>, + dept_tags_2 ARRAY<STRING> + ) + DISTRIBUTED BY HASH(tag_name) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """INSERT INTO ${tb_name2} VALUES + ('tech', 'Work', 1, 'R&D', ['Microservice', 'Docker'], ['Microservice', 'Docker']), + ('music', 'Life', 1, 'R&D', ['Microservice', 'Docker'], ['Microservice', 'Docker']), + ('sport', 'Life', 1, 'R&D', ['Microservice', 'Docker'], ['Microservice', 'Docker']);""" + + // Test an implicit CROSS JOIN with UNNEST on an array column. + qt_implicit_cross_join_unnest_array """SELECT u.user_name, t.tag, t.pos FROM ${tb_name1} u, UNNEST(u.tags) WITH ORDINALITY AS t(tag, pos) ORDER BY u.user_name, t.pos;""" + + // Test an implicit CROSS JOIN with UNNEST on a bitmap column. + qt_implicit_cross_join_unnest_bitmap """SELECT u.user_name, b.day FROM ${tb_name1} u, UNNEST(u.login_days) AS b(day) ORDER BY u.user_name, b.day;""" + + // Cross Join + // Test an explicit CROSS JOIN with UNNEST, followed by a subsequent JOIN. + qt_explicit_cross_join_unnest_then_join """ + SELECT + u.user_name, + expl_tags.tag_name, + tc.tag_group + FROM ${tb_name1} u + CROSS JOIN UNNEST(u.tags) AS expl_tags(tag_name) + JOIN ${tb_name2} tc ON expl_tags.tag_name = tc.tag_name + ORDER BY u.user_name, expl_tags.tag_name, tc.tag_group;""" + + // const virtual table + // Test UNNEST on a constant array to create a virtual table. + qt_unnest_constant_array """SELECT * FROM UNNEST(['a', 'b', 'c']) WITH ORDINALITY AS t(val, pos) ORDER BY pos;""" + + // more unnest + // Test multiple UNNEST clauses in the same FROM clause for an array and a map. + qt_multiple_unnest_in_from """ + SELECT + u.user_name, + t.tag, + m.info_item + FROM ${tb_name1} u, + UNNEST(u.tags) AS t(tag), + UNNEST(u.ext_info) AS m(info_item) + ORDER BY u.user_name, t.tag, m.info_item;""" + + // subquery + // Test using UNNEST within a subquery. + qt_unnest_in_subquery """ + SELECT tmp.user_name, tmp.tag + FROM ( + SELECT u.user_name, t.tag + FROM ${tb_name1} u, UNNEST(u.tags) AS t(tag) + ) tmp + ORDER BY tmp.user_name, tmp.tag;""" + + + // join + // Test a LEFT JOIN with UNNEST. + qt_left_join_unnest """SELECT u.user_name, t.tag FROM ${tb_name1} u LEFT JOIN UNNEST(u.tags) WITH ORDINALITY AS t(tag, pos) ON true ORDER BY u.user_name, t.pos;""" + + // Test an explicit CROSS JOIN with UNNEST. + qt_explicit_cross_join_unnest """SELECT u.user_name, t.tag FROM ${tb_name1} u CROSS JOIN UNNEST(u.tags) AS t(tag) ORDER BY u.user_name, t.tag;""" + + test { + // Test that FULL JOIN with UNNEST is not supported and fails as expected. + sql """SELECT u.user_name, t.tag FROM ${tb_name1} u FULL JOIN UNNEST(u.tags) AS t(tag) ON false;""" // fail, don't support + exception "The combining JOIN type must be INNER, LEFT or CROSS for UNNEST" + } + + // Test chained UNNEST on a nested array, where the second UNNEST depends on the first. + qt_chained_unnest_on_nested_array """SELECT u.user_name, t1.sub_arr, t2.item FROM ${tb_name1} u, UNNEST(u.tags_2) AS t1(sub_arr), UNNEST(t1.sub_arr) AS t2(item) ORDER BY u.user_name, t2.item;""" + + // more table dependency + // Test unnesting multiple arrays from two implicitly joined tables. + qt_multi_table_multi_array_unnest_implicit """SELECT + a.u_name, + b.dept_name, + t.combined_tag, + t.combined_tag_2, + t.combined_tag_3, + t.combined_tag_4 + FROM (${tb_name1} a, ${tb_name2} b) + CROSS JOIN UNNEST(a.user_tags, b.dept_tags, a.user_tags_2, b.dept_tags_2) AS t(combined_tag, combined_tag_2, combined_tag_3, combined_tag_4) + WHERE a.u_id = b.d_id + ORDER BY a.u_name, b.dept_name, t.combined_tag, t.combined_tag_2, t.combined_tag_3, t.combined_tag_4; + """ + + // Test unnesting multiple arrays from two tables listed in the FROM clause. + qt_multi_table_multi_array_unnest_from """SELECT + a.u_name, + b.dept_name, + t.combined_tag, + t.combined_tag_2, + t.combined_tag_3, + t.combined_tag_4 + FROM ${tb_name1} a, ${tb_name2} b, UNNEST(a.user_tags, b.dept_tags, a.user_tags_2, b.dept_tags_2) AS t(combined_tag, combined_tag_2, combined_tag_3, combined_tag_4) + WHERE a.u_id = b.d_id + ORDER BY a.u_name, b.dept_name, t.combined_tag, t.combined_tag_2, t.combined_tag_3, t.combined_tag_4; + """ + + // Test unnesting multiple arrays from two tables using explicit JOIN and CROSS JOIN syntax. + qt_multi_table_multi_array_unnest_explicit """SELECT + a.u_name, + b.dept_name, + t.combined_tag, + t.combined_tag_2, + t.combined_tag_3, + t.combined_tag_4 + FROM ${tb_name1} a + JOIN ${tb_name2} b ON a.u_id = b.d_id + CROSS JOIN UNNEST(a.user_tags, b.dept_tags, a.user_tags_2, b.dept_tags_2) AS t(combined_tag, combined_tag_2, combined_tag_3, combined_tag_4) + ORDER BY a.u_name, b.dept_name, t.combined_tag, t.combined_tag_2, t.combined_tag_3, t.combined_tag_4; + """ + + sql """drop table if exists ${tb_name3}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name3} ( + case_id INT, + case_desc VARCHAR(100), + big_bitmap BITMAP, + long_array ARRAY<STRING> + ) + DUPLICATE KEY(case_id) + DISTRIBUTED BY HASH(case_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name3} + SELECT + 1, + '1M_ID_Bitmap', + BITMAP_UNION(to_bitmap(number)), + [] + FROM numbers("number" = "1000000");""" Review Comment: This test builds a 1,000,000-row bitmap via `numbers("number" = "1000000")` inside a p0 suite. That can significantly increase regression runtime and resource usage. Consider reducing the scale (e.g., 100k) or moving/marking this as a heavier/nonConcurrent test tier if large-scale coverage is required. ```suggestion '100K_ID_Bitmap', BITMAP_UNION(to_bitmap(number)), [] FROM numbers("number" = "100000");""" ``` ########## regression-test/suites/nereids_function_p0/gen_function/unnest_select_list_test.groovy: ########## @@ -0,0 +1,206 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_select_list_test", "unnest") { + + String prefix_str = "unnest_select_list_" + def tb_name1 = prefix_str + "table1" + + sql """drop table if exists ${tb_name1}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name1} ( + id INT, + user_name VARCHAR(50), + scores ARRAY<INT>, + user_ids BITMAP, + properties MAP<STRING, STRING> + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """ + INSERT INTO ${tb_name1} VALUES + (1, 'Alice', [90, 85, 95], to_bitmap(101), {'age': '25', 'city': 'Beijing'}), + (2, 'Bob', [70, 80], bitmap_from_string('201,202'), {'level': 'VIP', 'rank': 'A'}), + (3, 'EmptyCase', [], bitmap_empty(), {}), + (4, 'NullCase', NULL, bitmap_empty(), NULL);""" + + // Test using UNNEST on an array in the SELECT list. + qt_select_unnest_array """SELECT id, user_name, UNNEST(scores) as score FROM ${tb_name1} ORDER BY id, user_name, score;""" + + // array + test { + // Test that using WITH ORDINALITY for UNNEST in the SELECT list is not supported. + sql """ + SELECT + id, + user_name, + UNNEST(scores) WITH ORDINALITY AS (score_pos, score_val) + FROM ${tb_name1};""" + exception "mismatched input" + } + + // Test UNNEST on a constant array in the SELECT list. + qt_select_unnest_constant_array """SELECT UNNEST(['a', 'b', 'c']) as col1 ORDER BY col1;""" + + test { + // Test that WITH ORDINALITY is not supported for UNNEST on a constant array in the SELECT list. + sql """SELECT UNNEST(['a', 'b', 'c']) WITH ORDINALITY AS (val, pos);""" + exception "mismatched input" + } + + // Test UNNEST on a modified array (result of a function) in the SELECT list. + qt_select_unnest_on_modified_array """ + SELECT + id, + UNNEST(ARRAY_SORT(scores)) AS sorted_score + FROM ${tb_name1} WHERE id = 1 + ORDER BY id, sorted_score;""" + + test { + // Test that nested UNNEST calls in the SELECT list are not supported. + sql """SELECT UNNEST(t.v) FROM (SELECT UNNEST(ARRAY(ARRAY(1,2))) AS v) t;""" + exception "Could not find function unnest" + } + + + // bitmap + // Test using UNNEST on a bitmap in the SELECT list. + qt_select_unnest_bitmap """ + SELECT + id, + user_name, + UNNEST(user_ids) AS exploded_id + FROM ${tb_name1} + ORDER BY id, user_name, exploded_id;""" + + // Test UNNEST on a constant bitmap in the SELECT list. + qt_select_unnest_constant_bitmap """SELECT UNNEST(bitmap_from_string('10,20,30')) AS uid ORDER BY uid;""" + + // Test aggregating the results of an UNNEST in a subquery's SELECT list. + qt_aggregate_on_select_unnest_in_subquery """ + SELECT SUM(uid) FROM ( + SELECT UNNEST(user_ids) AS uid FROM ${tb_name1} + ) t;""" + + // Test UNNEST on the result of a bitmap function in the SELECT list. + qt_select_unnest_on_bitmap_function_result """SELECT UNNEST(BITMAP_AND(user_ids, to_bitmap(201))) AS intersection FROM ${tb_name1} ORDER BY intersection;""" + // Test using UNNEST directly inside a COUNT aggregate function. + qt_count_of_unnest """SELECT COUNT(UNNEST(scores)) FROM ${tb_name1};""" + + // map + // Test using UNNEST on a map in the SELECT list. + qt_select_unnest_map """ + SELECT + id, + UNNEST(properties) AS map_entry + FROM ${tb_name1} + ORDER BY id, map_entry;""" + + // Test UNNEST on a constant map in the SELECT list. + qt_select_unnest_constant_map """SELECT UNNEST(MAP('k1', 'v1', 'k2', 'v2')) AS entry ORDER BY entry;""" + + // Test filtering the results of an UNNEST in a subquery's SELECT list. + qt_filter_on_select_unnest_in_subquery """ + SELECT * FROM ( + SELECT id, UNNEST(properties) AS entry FROM ${tb_name1} + ) t + WHERE entry LIKE '%VIP%' + ORDER BY t.id, t.entry;""" + + // null + // Test UNNEST on a NULL array column in the SELECT list. + qt_select_unnest_null_array """SELECT UNNEST(scores) as score FROM ${tb_name1} WHERE id = 4 ORDER BY score NULLS FIRST;""" + + // cross join + test { + // Test that multiple UNNEST functions with different types in the SELECT list are invalid. + sql """SELECT UNNEST(scores), UNNEST(user_ids) FROM ${tb_name1};""" + exception "multiple UNNEST functions in same place must have ARRAY argument type" + } + + // nested + test { + // Test that nested UNNEST on a constant array in the SELECT list is invalid. + sql """SELECT UNNEST(UNNEST(ARRAY(ARRAY(1,2), ARRAY(3,4))));""" + exception "Could not find function unnest" + } + + // agg + sql """SELECT COUNT(UNNEST(scores)) FROM ${tb_name1};""" + Review Comment: This `sql` statement executes a query but does not assert anything, and it duplicates the earlier `qt_count_of_unnest` coverage. Consider removing it, or converting it into a `qt_*` so regressions are actually checked. ```suggestion ``` ########## regression-test/suites/nereids_function_p0/gen_function/unnest_where_list_test.groovy: ########## @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("unnest_where_list_test", "unnest") { + + String prefix_str = "unnest_where_list_" + def tb_name1 = prefix_str + "table1" + def tb_name2 = prefix_str + "table2" + + sql """drop table if exists ${tb_name1}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name1} ( + user_id INT, + user_name VARCHAR(50), + user_type VARCHAR(20), + tags ARRAY<STRING>, + login_days BITMAP, + ext_props MAP<STRING, STRING> + ) + DUPLICATE KEY(user_id) + DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ("replication_num" = "1");""" + + sql """INSERT INTO ${tb_name1} VALUES + (1, 'Alice', 'VIP', ['tech', 'music', 'sport'], bitmap_from_string("1, 2, 10"), {'level': 'gold'}), + (2, 'Bob', 'Normal', ['teach', 'math'], bitmap_from_string("15, 20"), {'level': 'silver'}), + (3, 'Charlie', 'Normal', [], bitmap_empty(), {}), + (4, 'David', 'VIP', NULL, bitmap_empty(), NULL);""" + + sql """drop table if exists ${tb_name2}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tb_name2} ( + ref_id INT, + ref_tag STRING + ) DISTRIBUTED BY HASH(ref_id) BUCKETS 1 PROPERTIES ("replication_num" = "1");""" + + sql """INSERT INTO ${tb_name2} VALUES (1, 'tech'), (2, 'math');""" + + // subquery nested + test { + // Test a correlated EXISTS subquery with UNNEST in the WHERE clause, which is expected to fail. + sql """SELECT user_name FROM ${tb_name1} u WHERE EXISTS (SELECT 1 FROM UNNEST(u.tags) AS t(tag) WHERE t.tag = 'tech');""" + exception "Input slot(s) not in child's output" + } + + test { + // Test a correlated IN subquery with UNNEST in the WHERE clause, which is expected to fail. + sql """SELECT user_name FROM ${tb_name1} u WHERE 'math' IN (SELECT tag FROM UNNEST(u.tags) AS t(tag));""" + exception "detailMessage" Review Comment: The failure assertion is too generic: `exception "detailMessage"` can match many unrelated errors and makes this test fragile across versions. Please assert a stable, specific substring of the actual error thrown for this query (similar to the previous test’s `Input slot(s) not in child's output`). ```suggestion exception "Input slot(s) not in child's output" ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
