This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4ca0c0face [fix](join) fix wrong result of right join (#18365)
4ca0c0face is described below

commit 4ca0c0facecf532b2cc032a22bc0c99760d718d4
Author: TengJianPing <18241664+jackte...@users.noreply.github.com>
AuthorDate: Thu Apr 6 10:55:58 2023 +0800

    [fix](join) fix wrong result of right join (#18365)
    
    When processing data in hash table for right join and full outer join, if 
the output data rows of one hash bucket excceeds batch size, the logic when 
continue processing this bucket is wrong, it should differentiate between 
different join types.
---
 .../vec/exec/join/process_hash_table_probe_impl.h  |  34 +++--
 .../functions_test/test_right_join.out             |  13 ++
 .../functions_test/test_right_join.groovy          | 158 +++++++++++++++++++++
 3 files changed, 196 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index fa23105c1e..ba49134c03 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -1058,8 +1058,24 @@ Status 
ProcessHashTableProbe<JoinOpType>::process_data_in_hashtable(HashTableTyp
         };
 
         if (visited_iter.ok()) {
-            for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
-                insert_from_hash_table(visited_iter->block_offset, 
visited_iter->row_num);
+            if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) {
+                for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
+                    insert_from_hash_table(visited_iter->block_offset, 
visited_iter->row_num);
+                }
+            } else {
+                for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
+                    if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
+                        if (visited_iter->visited) {
+                            insert_from_hash_table(visited_iter->block_offset,
+                                                   visited_iter->row_num);
+                        }
+                    } else {
+                        if (!visited_iter->visited) {
+                            insert_from_hash_table(visited_iter->block_offset,
+                                                   visited_iter->row_num);
+                        }
+                    }
+                }
             }
             if (!visited_iter.ok()) {
                 ++iter;
@@ -1070,16 +1086,16 @@ Status 
ProcessHashTableProbe<JoinOpType>::process_data_in_hashtable(HashTableTyp
             auto& mapped = iter->get_second();
             if constexpr (std::is_same_v<Mapped, RowRefListWithFlag>) {
                 if (mapped.visited) {
-                    visited_iter = mapped.begin();
-                    for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
-                        if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
+                    if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
+                        visited_iter = mapped.begin();
+                        for (; visited_iter.ok() && block_size < _batch_size; 
++visited_iter) {
                             insert_from_hash_table(visited_iter->block_offset,
                                                    visited_iter->row_num);
                         }
-                    }
-                    if (visited_iter.ok()) {
-                        // block_size >= _batch_size, quit for loop
-                        break;
+                        if (visited_iter.ok()) {
+                            // block_size >= _batch_size, quit for loop
+                            break;
+                        }
                     }
                 } else {
                     if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) {
diff --git 
a/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out 
b/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out
new file mode 100644
index 0000000000..94939a0607
--- /dev/null
+++ b/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_right_outer_join_with_other_conjuncts --
+27796
+
+-- !sql_full_join_with_other_conjuncts --
+27796
+
+-- !sql_right_semi_join_with_other_conjuncts --
+8153
+
+-- !sql_right_anti_join_with_other_conjuncts --
+19643
+
diff --git 
a/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy 
b/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy
new file mode 100644
index 0000000000..32683de5f9
--- /dev/null
+++ b/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_right_join") {
+    sql """set enable_nereids_planner=false"""
+    sql """set parallel_fragment_exec_instance_num=1"""
+    sql "use regression_test_tpcds_sf1_p1"
+
+    qt_sql_right_outer_join_with_other_conjuncts """
+      WITH customer_total_return AS (
+          SELECT
+              ca_state ctr_state,
+              sum(cr_return_amt_inc_tax) ctr_total_return
+          FROM
+              catalog_returns,
+              date_dim,
+              customer_address
+          WHERE
+              (cr_returned_date_sk = d_date_sk)
+              AND (d_year = 2000)
+              AND (cr_returning_addr_sk = ca_address_sk)
+          GROUP BY
+              cr_returning_customer_sk,
+              ca_state
+      ),
+      ctr2 as (
+          SELECT
+              (avg(ctr_total_return)) ctr_total_return,
+              ctr_state
+          FROM
+              customer_total_return
+          group by
+              ctr_state
+      )
+      SELECT
+          count(ctr1.ctr_state)
+      FROM
+          ctr2 right
+          join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state
+          and ctr1.ctr_total_return > ctr2.ctr_total_return;
+    """
+
+    qt_sql_full_join_with_other_conjuncts """
+      WITH customer_total_return AS (
+          SELECT
+              ca_state ctr_state,
+              sum(cr_return_amt_inc_tax) ctr_total_return
+          FROM
+              catalog_returns,
+              date_dim,
+              customer_address
+          WHERE
+              (cr_returned_date_sk = d_date_sk)
+              AND (d_year = 2000)
+              AND (cr_returning_addr_sk = ca_address_sk)
+          GROUP BY
+              cr_returning_customer_sk,
+              ca_state
+      ),
+      ctr2 as (
+          SELECT
+              (avg(ctr_total_return)) ctr_total_return,
+              ctr_state
+          FROM
+              customer_total_return
+          group by
+              ctr_state
+      )
+      SELECT
+          count(ctr1.ctr_state)
+      FROM
+          ctr2 full
+          join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state
+          and ctr1.ctr_total_return > ctr2.ctr_total_return;
+    """
+
+    qt_sql_right_semi_join_with_other_conjuncts """
+      WITH customer_total_return AS (
+          SELECT
+              ca_state ctr_state,
+              sum(cr_return_amt_inc_tax) ctr_total_return
+          FROM
+              catalog_returns,
+              date_dim,
+              customer_address
+          WHERE
+              (cr_returned_date_sk = d_date_sk)
+              AND (d_year = 2000)
+              AND (cr_returning_addr_sk = ca_address_sk)
+          GROUP BY
+              cr_returning_customer_sk,
+              ca_state
+      ),
+      ctr2 as (
+          SELECT
+              (avg(ctr_total_return)) ctr_total_return,
+              ctr_state
+          FROM
+              customer_total_return
+          group by
+              ctr_state
+      )
+      SELECT
+          count(ctr1.ctr_state)
+      FROM
+          ctr2 right semi
+          join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state
+          and ctr1.ctr_total_return > ctr2.ctr_total_return;
+    """
+
+    qt_sql_right_anti_join_with_other_conjuncts """
+      WITH customer_total_return AS (
+          SELECT
+              ca_state ctr_state,
+              sum(cr_return_amt_inc_tax) ctr_total_return
+          FROM
+              catalog_returns,
+              date_dim,
+              customer_address
+          WHERE
+              (cr_returned_date_sk = d_date_sk)
+              AND (d_year = 2000)
+              AND (cr_returning_addr_sk = ca_address_sk)
+          GROUP BY
+              cr_returning_customer_sk,
+              ca_state
+      ),
+      ctr2 as (
+          SELECT
+              (avg(ctr_total_return)) ctr_total_return,
+              ctr_state
+          FROM
+              customer_total_return
+          group by
+              ctr_state
+      )
+      SELECT
+          count(ctr1.ctr_state)
+      FROM
+          ctr2 right anti
+          join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state
+          and ctr1.ctr_total_return > ctr2.ctr_total_return;
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to