This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-4.0-preview in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4842cb41dda71d345ee7c991dd384407cfc8731d Author: qiye <jianliang5...@gmail.com> AuthorDate: Fri Apr 26 19:46:49 2024 +0800 [fix](ES catalog)Make col != '' behavior consistent with SQL (#34151) In SQL syntax, `col != ''` equals `col.length() > 0`. It means that this column must exist in ES doc fields and its content is not empty. In this PR, we make a special translation for this binary predicate to keep the behavior of both consistent. --------- Co-authored-by: Luennng <luen...@gmail.com> --- .../java/org/apache/doris/datasource/es/QueryBuilders.java | 10 ++++++++++ .../apache/doris/external/elasticsearch/QueryBuildersTest.java | 9 +++++++++ regression-test/data/external_table_p0/es/test_es_query.out | 9 +-------- .../suites/external_table_p0/es/test_es_query.groovy | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java index 241f9e4ba43..3a54e012a32 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java @@ -40,6 +40,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import lombok.Builder; import lombok.Data; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.joda.time.format.DateTimeFormat; @@ -137,6 +138,15 @@ public final class QueryBuilders { case EQ_FOR_NULL: return QueryBuilders.termQuery(column, value); case NE: + // col != '' means col.length() > 0 in SQL syntax. + // The `NULL` value should not present in results. + // It equals + // '{"bool":{"must":{"bool":{"must_not":{"term":{"col":""}},"must":{"exists":{"field":"col"}}}}}}' + // in Elasticsearch + if (value instanceof String && StringUtils.isEmpty((String) value)) { + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value)) + .must(QueryBuilders.existsQuery(column)); + } return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value)); case GE: return QueryBuilders.rangeQuery(column).gte(value); diff --git a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java index 9829c1705ce..3cf9261b932 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java @@ -86,6 +86,15 @@ public class QueryBuildersTest { Assertions.assertEquals("{\"term\":{\"k2\":\"2023-02-19T22:00:00.000+08:00\"}}", QueryBuilders.toEsDsl(dateTimeEqExpr, new ArrayList<>(), new HashMap<>(), BuilderOptions.builder().needCompatDateFields(Lists.newArrayList("k2")).build()).toJson()); + SlotRef k3 = new SlotRef(null, "k3"); + Expr stringLiteral = new StringLiteral(""); + Expr stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral); + Assertions.assertEquals("{\"bool\":{\"must\":{\"exists\":{\"field\":\"k3\"}},\"must_not\":{\"term\":{\"k3\":\"\"}}}}", + QueryBuilders.toEsDsl(stringNeExpr).toJson()); + stringLiteral = new StringLiteral("message"); + stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral); + Assertions.assertEquals("{\"bool\":{\"must_not\":{\"term\":{\"k3\":\"message\"}}}}", + QueryBuilders.toEsDsl(stringNeExpr).toJson()); } @Test diff --git a/regression-test/data/external_table_p0/es/test_es_query.out b/regression-test/data/external_table_p0/es/test_es_query.out index 78556a3cd46..2e98ee6a174 100644 --- a/regression-test/data/external_table_p0/es/test_es_query.out +++ b/regression-test/data/external_table_p0/es/test_es_query.out @@ -12,8 +12,6 @@ 2022-08-08 2022-08-11T12:10:10 2022-08-11T12:10:10 2022-08-11T12:10:10 2022-08-11T11:10:10 -- !sql04 -- -\N -\N I'm not null or empty -- !sql05 -- @@ -91,7 +89,6 @@ true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01T12:00 a d 192.168.0. 2022-08-08T20:10:10 -- !sql_6_16 -- -\N I'm not null or empty -- !sql_6_17 -- @@ -168,8 +165,6 @@ value1 value2 2022-08-08T20:10:10 -- !sql_7_19 -- -\N -\N I'm not null or empty -- !sql_7_20 -- @@ -188,7 +183,7 @@ I'm not null or empty [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] \N I'm not null or empty \N string3 2022-08-09T00:40:10 text3_4*5 5.0 2022-08-08T00:00 2022-08-10T12:10:10 1660104610000 2022-08-10T [...] [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 [...] --- !sql_7_19 -- +-- !sql_7_24 -- value1 value2 -- !sql_8_01 -- @@ -255,8 +250,6 @@ value1 value2 2022-08-08T20:10:10 -- !sql_8_17 -- -\N -\N I'm not null or empty -- !sql_8_18 -- diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy b/regression-test/suites/external_table_p0/es/test_es_query.groovy index 9acf67891e5..f2af00d6fe6 100644 --- a/regression-test/suites/external_table_p0/es/test_es_query.groovy +++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy @@ -242,7 +242,7 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { } assertTrue(containeHide7) - order_qt_sql_7_19 """select * from test3_20231005""" + order_qt_sql_7_24 """select * from test3_20231005""" sql """switch test_es_query_es8""" order_qt_sql_8_01 """select * from test1 where test2='text#1'""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org