This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d3ebda73c17 [fix](multi-catalog) Disable string dictionary filtering 
when predicate express is not slot (#42113)
d3ebda73c17 is described below

commit d3ebda73c1738de13efd7b6ff77fa98a2fc56d4a
Author: Socrates <suxiaogang...@icloud.com>
AuthorDate: Mon Oct 21 23:01:51 2024 +0800

    [fix](multi-catalog) Disable string dictionary filtering when predicate 
express is not slot (#42113)
    
    ## Proposed changes
    follow up https://github.com/apache/doris/pull/35335/
    When the `"case when ... then ... when ... then ... else"` occurs,
    function_expr may not exist in the pushed down predicate, but the
    handling of null values ​​is still problematic.
    
    table data:
    ```text
    mysql> select o_orderpriority from test_string_dict_filter_orc;
    +-----------------+
    | o_orderpriority |
    +-----------------+
    | 5-LOW           |
    | 1-URGENT        |
    | 5-LOW           |
    | NULL            |
    | 5-LOW           |
    +-----------------+
    ```
    
    before:
    ```text
    mysql> select count(o_orderpriority) from ( select (case when 
o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' 
end) as o_orderpriority from test_string_dict_filter_orc ) as A where 
o_orderpriority = '0';
    +------------------------+
    | count(o_orderpriority) |
    +------------------------+
    |                      4 |
    +------------------------+
    ```
    
    after:
    ```text
    mysql> select count(o_orderpriority) from ( select (case when 
o_orderpriority = 'x' then '1' when o_orderpriority = 'y' then '2' else '0' 
end) as o_orderpriority from test_string_dict_filter_orc ) as A where 
o_orderpriority = '0';
    +------------------------+
    | count(o_orderpriority) |
    +------------------------+
    |                      5 |
    +------------------------+
    ```
---
 be/src/vec/exec/format/orc/vorc_reader.cpp                   |  4 ++--
 be/src/vec/exec/format/parquet/vparquet_group_reader.cpp     |  4 ++--
 .../data/external_table_p0/hive/test_string_dict_filter.out  | 12 ++++++++++++
 .../external_table_p0/hive/test_string_dict_filter.groovy    |  6 ++++++
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 761ddf3a5f9..6b6639f2feb 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -2022,9 +2022,9 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
         //  the implementation of NULL values because the dictionary itself 
does not contain
         //  NULL value encoding. As a result, many NULL-related functions or 
expressions
         //  cannot work properly, such as is null, is not null, coalesce, etc.
-        //  Here we first disable dictionary filtering when predicate contains 
functions.
+        //  Here we first disable dictionary filtering when predicate expr is 
not slot.
         //  Implementation of NULL value dictionary filtering will be carried 
out later.
-        if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
+        if (expr->node_type() != TExprNodeType::SLOT_REF) {
             return false;
         }
         for (auto& child : expr->children()) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 37e82774c39..b9259be936b 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -212,9 +212,9 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id,
         //  the implementation of NULL values because the dictionary itself 
does not contain
         //  NULL value encoding. As a result, many NULL-related functions or 
expressions
         //  cannot work properly, such as is null, is not null, coalesce, etc.
-        //  Here we first disable dictionary filtering when predicate contains 
functions.
+        //  Here we first disable dictionary filtering when predicate is not 
slot.
         //  Implementation of NULL value dictionary filtering will be carried 
out later.
-        if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
+        if (expr->node_type() != TExprNodeType::SLOT_REF) {
             return false;
         }
         for (auto& child : expr->children()) {
diff --git 
a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out 
b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out
index a14f225abe5..2a8cebd8723 100644
--- a/regression-test/data/external_table_p0/hive/test_string_dict_filter.out
+++ b/regression-test/data/external_table_p0/hive/test_string_dict_filter.out
@@ -56,6 +56,9 @@ null
 -- !q14 --
 null
 
+-- !q15 --
+5
+
 -- !q01 --
 3      123314  F       193846.25       1993-10-14      5-LOW   Clerk#000000955 
0       sly final accounts boost. carefully regular ideas cajole carefully. 
depos
 5      44485   F       144659.20       1994-07-30      5-LOW   Clerk#000000925 
0       quickly. bold deposits sleep slyly. packages use slyly
@@ -113,6 +116,9 @@ null
 -- !q14 --
 null
 
+-- !q15 --
+5
+
 -- !q01 --
 3      123314  F       193846.25       1993-10-14      5-LOW   Clerk#000000955 
0       sly final accounts boost. carefully regular ideas cajole carefully. 
depos
 5      44485   F       144659.20       1994-07-30      5-LOW   Clerk#000000925 
0       quickly. bold deposits sleep slyly. packages use slyly
@@ -170,6 +176,9 @@ null
 -- !q14 --
 null
 
+-- !q15 --
+5
+
 -- !q01 --
 3      123314  F       193846.25       1993-10-14      5-LOW   Clerk#000000955 
0       sly final accounts boost. carefully regular ideas cajole carefully. 
depos
 5      44485   F       144659.20       1994-07-30      5-LOW   Clerk#000000925 
0       quickly. bold deposits sleep slyly. packages use slyly
@@ -227,3 +236,6 @@ null
 -- !q14 --
 null
 
+-- !q15 --
+5
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy 
b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy
index 82afc63042f..1929c813c55 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_string_dict_filter.groovy
@@ -59,6 +59,9 @@ suite("test_string_dict_filter", 
"p0,external,hive,external_docker,external_dock
         qt_q14 """
         select * from ( select COALESCE(o_orderpriority, 'null') AS 
o_orderpriority from test_string_dict_filter_parquet ) as A where 
o_orderpriority = 'null';
         """
+        qt_q15 """
+        select count(o_orderpriority) from ( select (case when o_orderpriority 
= 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as 
o_orderpriority from test_string_dict_filter_parquet ) as A where 
o_orderpriority = '0';
+        """
     }
     def q_orc = {
         qt_q01 """
@@ -103,6 +106,9 @@ suite("test_string_dict_filter", 
"p0,external,hive,external_docker,external_dock
         qt_q14 """
         select * from ( select COALESCE(o_orderpriority, 'null') AS 
o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = 
'null';
         """
+        qt_q15 """
+        select count(o_orderpriority) from ( select (case when o_orderpriority 
= 'x' then '1' when o_orderpriority = 'y' then '2' else '0' end) as 
o_orderpriority from test_string_dict_filter_orc ) as A where o_orderpriority = 
'0';
+        """
     }
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled == null || !enabled.equalsIgnoreCase("true")) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to