This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-4.0-preview
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4842cb41dda71d345ee7c991dd384407cfc8731d
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Fri Apr 26 19:46:49 2024 +0800

    [fix](ES catalog)Make col != '' behavior consistent with SQL (#34151)
    
    In SQL syntax, `col != ''` equals `col.length() > 0`.
    It means that this column must exist in ES doc fields and its content is 
not empty.
    In this PR, we make a special translation for this binary predicate to keep 
the behavior of both consistent.
    
    ---------
    
    Co-authored-by: Luennng <luen...@gmail.com>
---
 .../java/org/apache/doris/datasource/es/QueryBuilders.java     | 10 ++++++++++
 .../apache/doris/external/elasticsearch/QueryBuildersTest.java |  9 +++++++++
 regression-test/data/external_table_p0/es/test_es_query.out    |  9 +--------
 .../suites/external_table_p0/es/test_es_query.groovy           |  2 +-
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java
index 241f9e4ba43..3a54e012a32 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java
@@ -40,6 +40,7 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import lombok.Builder;
 import lombok.Data;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.joda.time.format.DateTimeFormat;
@@ -137,6 +138,15 @@ public final class QueryBuilders {
             case EQ_FOR_NULL:
                 return QueryBuilders.termQuery(column, value);
             case NE:
+                // col != '' means col.length() > 0 in SQL syntax.
+                // The `NULL` value should not present in results.
+                // It equals
+                // 
'{"bool":{"must":{"bool":{"must_not":{"term":{"col":""}},"must":{"exists":{"field":"col"}}}}}}'
+                // in Elasticsearch
+                if (value instanceof String && StringUtils.isEmpty((String) 
value)) {
+                    return 
QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value))
+                        .must(QueryBuilders.existsQuery(column));
+                }
                 return 
QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value));
             case GE:
                 return QueryBuilders.rangeQuery(column).gte(value);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
index 9829c1705ce..3cf9261b932 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
@@ -86,6 +86,15 @@ public class QueryBuildersTest {
         
Assertions.assertEquals("{\"term\":{\"k2\":\"2023-02-19T22:00:00.000+08:00\"}}",
                 QueryBuilders.toEsDsl(dateTimeEqExpr, new ArrayList<>(), new 
HashMap<>(),
                         
BuilderOptions.builder().needCompatDateFields(Lists.newArrayList("k2")).build()).toJson());
+        SlotRef k3 = new SlotRef(null, "k3");
+        Expr stringLiteral = new StringLiteral("");
+        Expr stringNeExpr = new BinaryPredicate(Operator.NE, k3, 
stringLiteral);
+        
Assertions.assertEquals("{\"bool\":{\"must\":{\"exists\":{\"field\":\"k3\"}},\"must_not\":{\"term\":{\"k3\":\"\"}}}}",
+                QueryBuilders.toEsDsl(stringNeExpr).toJson());
+        stringLiteral = new StringLiteral("message");
+        stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral);
+        
Assertions.assertEquals("{\"bool\":{\"must_not\":{\"term\":{\"k3\":\"message\"}}}}",
+                QueryBuilders.toEsDsl(stringNeExpr).toJson());
     }
 
     @Test
diff --git a/regression-test/data/external_table_p0/es/test_es_query.out 
b/regression-test/data/external_table_p0/es/test_es_query.out
index 78556a3cd46..2e98ee6a174 100644
--- a/regression-test/data/external_table_p0/es/test_es_query.out
+++ b/regression-test/data/external_table_p0/es/test_es_query.out
@@ -12,8 +12,6 @@
 2022-08-08     2022-08-11T12:10:10     2022-08-11T12:10:10     
2022-08-11T12:10:10     2022-08-11T11:10:10
 
 -- !sql04 --
-\N
-\N
 I'm not null or empty
 
 -- !sql05 --
@@ -91,7 +89,6 @@ true  1       128     32768   -1      0       1.0     1.0     
1.0     1.0     2020-01-01      2020-01-01T12:00        a       d       
192.168.0.
 2022-08-08T20:10:10
 
 -- !sql_6_16 --
-\N
 I'm not null or empty
 
 -- !sql_6_17 --
@@ -168,8 +165,6 @@ value1      value2
 2022-08-08T20:10:10
 
 -- !sql_7_19 --
-\N
-\N
 I'm not null or empty
 
 -- !sql_7_20 --
@@ -188,7 +183,7 @@ I'm not null or empty
 [1, 0, 1, 1]   [1, -2, -3, 4]  ["2020-01-01", "2020-01-02"]    ["2020-01-01 
12:00:00", "2020-01-02 13:01:01"]  [1, 2, 3, 4]    [1, 1.1, 1.2, 1.3]      [1, 
2, 3, 4]    [32768, 32769, -32769, -32770]  ["192.168.0.1", "127.0.0.1"]    
["a", "b", "c"] [-1, 0, 1, 2]   ["{"name":"Andy","age":18}", 
"{"name":"Tim","age":28}"] [1, 2, 3, 4]    [128, 129, -129, -130]  ["d", "e", 
"f"] [0, 1, 2, 3]    \N      I'm not null or empty   \N      string3 
2022-08-09T00:40:10     text3_4*5       5.0     2022-08-08T00:00        
2022-08-10T12:10:10     1660104610000   2022-08-10T [...]
 [1, 0, 1, 1]   [1, -2, -3, 4]  ["2020-01-01", "2020-01-02"]    ["2020-01-01 
12:00:00", "2020-01-02 13:01:01"]  [1, 2, 3, 4]    [1, 1.1, 1.2, 1.3]      [1, 
2, 3, 4]    [32768, 32769, -32769, -32770]  ["192.168.0.1", "127.0.0.1"]    
["a", "b", "c"] [-1, 0, 1, 2]   ["{"name":"Andy","age":18}", 
"{"name":"Tim","age":28}"] [1, 2, 3, 4]    [128, 129, -129, -130]  ["d", "e", 
"f"] [0, 1, 2, 3]    debug   \N      This string can be quite lengthy        
string1 2022-08-08T20:10:10     text#1  3.14    2022-08-08T00:00        
2022-08-08T12:10:10     1659931810000 [...]
 
--- !sql_7_19 --
+-- !sql_7_24 --
 value1 value2
 
 -- !sql_8_01 --
@@ -255,8 +250,6 @@ value1      value2
 2022-08-08T20:10:10
 
 -- !sql_8_17 --
-\N
-\N
 I'm not null or empty
 
 -- !sql_8_18 --
diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy 
b/regression-test/suites/external_table_p0/es/test_es_query.groovy
index 9acf67891e5..f2af00d6fe6 100644
--- a/regression-test/suites/external_table_p0/es/test_es_query.groovy
+++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy
@@ -242,7 +242,7 @@ suite("test_es_query", 
"p0,external,es,external_docker,external_docker_es") {
         }
         assertTrue(containeHide7)
 
-        order_qt_sql_7_19 """select * from test3_20231005"""
+        order_qt_sql_7_24 """select * from test3_20231005"""
 
         sql """switch test_es_query_es8"""
         order_qt_sql_8_01 """select * from test1 where test2='text#1'"""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to