This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 4ca0c0face [fix](join) fix wrong result of right join (#18365) 4ca0c0face is described below commit 4ca0c0facecf532b2cc032a22bc0c99760d718d4 Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Thu Apr 6 10:55:58 2023 +0800 [fix](join) fix wrong result of right join (#18365) When processing data in hash table for right join and full outer join, if the output data rows of one hash bucket excceeds batch size, the logic when continue processing this bucket is wrong, it should differentiate between different join types. --- .../vec/exec/join/process_hash_table_probe_impl.h | 34 +++-- .../functions_test/test_right_join.out | 13 ++ .../functions_test/test_right_join.groovy | 158 +++++++++++++++++++++ 3 files changed, 196 insertions(+), 9 deletions(-) diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index fa23105c1e..ba49134c03 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -1058,8 +1058,24 @@ Status ProcessHashTableProbe<JoinOpType>::process_data_in_hashtable(HashTableTyp }; if (visited_iter.ok()) { - for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - insert_from_hash_table(visited_iter->block_offset, visited_iter->row_num); + if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) { + for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { + insert_from_hash_table(visited_iter->block_offset, visited_iter->row_num); + } + } else { + for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { + if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + if (visited_iter->visited) { + insert_from_hash_table(visited_iter->block_offset, + visited_iter->row_num); + } + } else { + if (!visited_iter->visited) { + insert_from_hash_table(visited_iter->block_offset, + visited_iter->row_num); + } + } + } } if (!visited_iter.ok()) { ++iter; @@ -1070,16 +1086,16 @@ Status ProcessHashTableProbe<JoinOpType>::process_data_in_hashtable(HashTableTyp auto& mapped = iter->get_second(); if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) { if (mapped.visited) { - visited_iter = mapped.begin(); - for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + visited_iter = mapped.begin(); + for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { insert_from_hash_table(visited_iter->block_offset, visited_iter->row_num); } - } - if (visited_iter.ok()) { - // block_size >= _batch_size, quit for loop - break; + if (visited_iter.ok()) { + // block_size >= _batch_size, quit for loop + break; + } } } else { if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) { diff --git a/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out b/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out new file mode 100644 index 0000000000..94939a0607 --- /dev/null +++ b/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_right_outer_join_with_other_conjuncts -- +27796 + +-- !sql_full_join_with_other_conjuncts -- +27796 + +-- !sql_right_semi_join_with_other_conjuncts -- +8153 + +-- !sql_right_anti_join_with_other_conjuncts -- +19643 + diff --git a/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy b/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy new file mode 100644 index 0000000000..32683de5f9 --- /dev/null +++ b/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_right_join") { + sql """set enable_nereids_planner=false""" + sql """set parallel_fragment_exec_instance_num=1""" + sql "use regression_test_tpcds_sf1_p1" + + qt_sql_right_outer_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 right + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ + + qt_sql_full_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 full + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ + + qt_sql_right_semi_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 right semi + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ + + qt_sql_right_anti_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 right anti + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org