This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 6dddd4c499b [function](cast)Make string casting to integers more like 
MySQL's beh… (#41541)
6dddd4c499b is described below

commit 6dddd4c499b4abcdbd0ec0666dbb7d8c83640a96
Author: Mryange <59914473+mrya...@users.noreply.github.com>
AuthorDate: Fri Oct 11 09:32:00 2024 +0800

    [function](cast)Make string casting to integers more like MySQL's beh… 
(#41541)
    
    …avior (#38847)
    https://github.com/apache/doris/pull/38847
    ## Proposed changes
    
    There are two issues here. First, the results of casting are
    inconsistent between FE and BE .
    ```
    FE
    mysql [(none)]>select cast('3.000' as int);
    +----------------------+
    | cast('3.000' as INT) |
    +----------------------+
    |                    3 |
    +----------------------+
    
    mysql [(none)]>set debug_skip_fold_constant = true;
    
    BE
    mysql [(none)]>select cast('3.000' as int);
    +----------------------+
    | cast('3.000' as INT) |
    +----------------------+
    |                 NULL |
    +----------------------+
    ```
    The second issue is that casting on BE converts '3.0' to null. Here, the
    casting logic for FE and BE has been unified
    
    <!--Describe your changes.-->
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
    
    ---------
    
    Co-authored-by: Xinyi Zou <zouxiny...@gmail.com>
---
 be/src/util/parse_util.cpp                         |  7 +++++++
 be/src/util/string_parser.hpp                      | 20 ++++++++++++++++--
 be/test/util/parse_util_test.cpp                   |  2 +-
 .../data/datatype_p0/string/test_string_basic.out  |  6 ++++++
 .../load_p0/stream_load/test_stream_load_cast.out  |  9 +++++---
 .../cast_function/test_cast_function.out           |  6 +++---
 .../data/variant_p0/compaction/test_compaction.out | 16 +++++++--------
 regression-test/data/variant_p0/load.out           |  2 +-
 .../datatype_p0/string/test_string_basic.groovy    |  8 +++++++-
 .../stream_load/test_stream_load_cast.groovy       | 24 +++++++++++-----------
 10 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/be/src/util/parse_util.cpp b/be/src/util/parse_util.cpp
index 8e810764e7b..e1bd796e974 100644
--- a/be/src/util/parse_util.cpp
+++ b/be/src/util/parse_util.cpp
@@ -98,6 +98,13 @@ int64_t ParseUtil::parse_mem_spec(const std::string& 
mem_spec_str, int64_t paren
         if (result != StringParser::PARSE_SUCCESS) {
             return -1;
         }
+
+        auto limit_val_double =
+                StringParser::string_to_float<double>(mem_spec_str.data(), 
number_str_len, &result);
+        if (result == StringParser::PARSE_SUCCESS && limit_val_double != 
limit_val) {
+            return -1; // mem_spec_str is double.
+        }
+
         bytes = limit_val;
     }
 
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index 8bc6ecae914..a9cea7b8d29 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -243,6 +243,20 @@ private:
         return true;
     }
 
+    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
+    static inline bool is_float_suffix(const char* __restrict s, int len) {
+        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
+    }
+
+    static inline bool is_all_digit(const char* __restrict s, int len) {
+        for (int i = 0; i < len; ++i) {
+            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     // Returns the position of the first non-whitespace character in s.
     static inline int skip_leading_whitespace(const char* __restrict s, int 
len) {
         int i = 0;
@@ -306,7 +320,8 @@ T StringParser::string_to_int_internal(const char* 
__restrict s, int len, ParseR
             }
             val = val * 10 + digit;
         } else {
-            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
+            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
+                                         !is_float_suffix(s + i, len - i))))) {
                 // Reject the string because either the first char was not a 
digit,
                 // or the remaining chars are not all whitespace
                 *result = PARSE_FAILURE;
@@ -448,7 +463,8 @@ T StringParser::string_to_int_no_overflow(const char* 
__restrict s, int len, Par
             T digit = s[i] - '0';
             val = val * 10 + digit;
         } else {
-            if ((UNLIKELY(!is_all_whitespace(s + i, len - i)))) {
+            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
+                          !is_float_suffix(s + i, len - i)))) {
                 *result = PARSE_FAILURE;
                 return 0;
             }
diff --git a/be/test/util/parse_util_test.cpp b/be/test/util/parse_util_test.cpp
index e15aaa323cb..99016b45fca 100644
--- a/be/test/util/parse_util_test.cpp
+++ b/be/test/util/parse_util_test.cpp
@@ -95,7 +95,7 @@ TEST(TestParseMemSpec, Bad) {
     for (const auto& value : bad_values) {
         bool is_percent = false;
         int64_t bytes = ParseUtil::parse_mem_spec(value, -1, 
MemInfo::_s_physical_mem, &is_percent);
-        EXPECT_EQ(-1, bytes);
+        EXPECT_EQ(-1, bytes) << ", value: " << value;
     }
 }
 
diff --git a/regression-test/data/datatype_p0/string/test_string_basic.out 
b/regression-test/data/datatype_p0/string/test_string_basic.out
index 6270f4f5d89..af2c8f666e3 100644
--- a/regression-test/data/datatype_p0/string/test_string_basic.out
+++ b/regression-test/data/datatype_p0/string/test_string_basic.out
@@ -98,3 +98,9 @@
 -- !test --
 a
 
+-- !cast_string_to_int --
+3      3       0       0       3       \N      3
+
+-- !cast_string_to_int --
+3      3       0       0       3       \N      3
+
diff --git a/regression-test/data/load_p0/stream_load/test_stream_load_cast.out 
b/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
index 23256d7aeb0..d2373568a6f 100644
--- a/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
+++ b/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
@@ -1,18 +1,21 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql1 --
-\N     \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N
+3      3       3       3       3       \N      \N      \N      \N      \N      
\N      \N
 
 -- !sql2 --
-\N     \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N
+3      3       3       3       3       \N      \N      \N      \N      \N      
\N      \N
+3      3       3       3       3       \N      \N      \N      \N      \N      
\N      \N
 
 -- !sql3 --
 \N     \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N
+3      3       3       3       3       \N      \N      \N      \N      \N      
\N      \N
 
 -- !sql4 --
-\N     \N      \N      \N      \N      3.12    3.12    3.1     2024-04-02      
2024-04-02      2024-04-02T17:00        2024-04-02T17:00
+3      3       3       3       3       3.12    3.12    3.1     2024-04-02      
2024-04-02      2024-04-02T17:00        2024-04-02T17:00
 
 -- !sql5 --
 \N     \N      \N      \N      \N      \N      \N      99999999.9      \N      
\N      \N      \N
+3      3       3       3       3       3.12    3.12    3.1     2024-04-02      
2024-04-02      2024-04-02T17:00        2024-04-02T17:00
 
 -- !sql6 --
 \N     \N      \N      \N      \N      \N      \N      99999999.9      \N      
\N      \N      \N
diff --git 
a/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
 
b/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
index 1eed1411842..6b34e73bd2e 100644
--- 
a/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
+++ 
b/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
@@ -30,11 +30,11 @@ true
 \N
 
 -- !sql_to_small --
-\N
+1212
 
 -- !sql_to_int --
-\N
+1212
 
 -- !sql_to_big --
-\N
+1212
 
diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out 
b/regression-test/data/variant_p0/compaction/test_compaction.out
index 57a5c142fbb..0b905e3930f 100644
--- a/regression-test/data/variant_p0/compaction/test_compaction.out
+++ b/regression-test/data/variant_p0/compaction/test_compaction.out
@@ -58,8 +58,8 @@
 14     [null]
 17     [1]
 17     [1]
-18     [1, 2, null]
-18     [1, 2, null]
+18     [1, 2, 1]
+18     [1, 2, 1]
 
 -- !sql_3 --
 19     1       {"c":1}
@@ -146,8 +146,8 @@
 14     [null]
 17     [1]
 17     [1]
-18     [1, 2, null]
-18     [1, 2, null]
+18     [1, 2, 1]
+18     [1, 2, 1]
 
 -- !sql_33 --
 19     1       {"c":1}
@@ -206,7 +206,7 @@
 -- !sql_2 --
 14     [null]
 17     [1]
-18     [1, 2, null]
+18     [1, 2, 1]
 
 -- !sql_3 --
 19     1       {"c":1}
@@ -259,7 +259,7 @@
 -- !sql_22 --
 14     [null]
 17     [1]
-18     [1, 2, null]
+18     [1, 2, 1]
 
 -- !sql_33 --
 19     1       {"c":1}
@@ -310,7 +310,7 @@
 -- !sql_2 --
 14     [null]
 17     [1]
-18     [1, 2, null]
+18     [1, 2, 1]
 
 -- !sql_3 --
 19     1       {"c":1}
@@ -363,7 +363,7 @@
 -- !sql_22 --
 14     [null]
 17     [1]
-18     [1, 2, null]
+18     [1, 2, 1]
 
 -- !sql_33 --
 19     1       {"c":1}
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index f25d054894c..c40c04cd349 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -1,6 +1,6 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql1 --
-1      [1, 2, null]
+1      [1, 2, 1]
 1      [1]
 1      [1]
 1      [null]
diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy 
b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
index 36fbddede2d..c33b7669bec 100644
--- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
+++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
@@ -381,5 +381,11 @@ suite("test_string_basic") {
     }
     assertEquals(table_too_long, "fail")
     sql "drop table if exists varchar_table_too_long;"
-}
 
+    //  calculations on the BE.
+    sql """ set debug_skip_fold_constant = true;"""
+    qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int) 
, cast('0000.0000' as int) , cast('0000' as int),  cast('3.123' as int),  
cast('3.000 ' as int),  cast('3.' as int)"""
+    //  calculations on the FE.
+    sql """ set debug_skip_fold_constant = false;"""
+    qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int) 
, cast('0000.0000' as int) , cast('0000' as int),  cast('3.123' as int),  
cast('3.000 ' as int),  cast('3.' as int)"""
+}
\ No newline at end of file
diff --git 
a/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy 
b/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
index cf2ef15333a..d173c0e76e1 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
@@ -62,7 +62,7 @@ suite("test_stream_load_cast", "p0") {
         }
     }
     sql "sync"
-    qt_sql1 "select * from ${tableName}"
+    qt_sql1 "select * from ${tableName} order by k0"
     sql "sync"
     sql "truncate table ${tableName}"
     sql "sync"
@@ -83,9 +83,9 @@ suite("test_stream_load_cast", "p0") {
             }
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
-            assertEquals("fail", json.Status.toLowerCase())
+            assertEquals("success", json.Status.toLowerCase())
             assertEquals(1, json.NumberTotalRows)
-            assertEquals(1, json.NumberFilteredRows)
+            assertEquals(0, json.NumberFilteredRows)
         }
     }
 
@@ -112,7 +112,7 @@ suite("test_stream_load_cast", "p0") {
         }
     }
     sql "sync"
-    qt_sql2 "select * from ${tableName}"
+    qt_sql2 "select * from ${tableName} order by k0"
     sql "sync"
     sql "truncate table ${tableName}"
     sql "sync"
@@ -134,9 +134,9 @@ suite("test_stream_load_cast", "p0") {
             }
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
-            assertEquals("fail", json.Status.toLowerCase())
+            assertEquals("success", json.Status.toLowerCase())
             assertEquals(1, json.NumberTotalRows)
-            assertEquals(1, json.NumberFilteredRows)
+            assertEquals(0, json.NumberFilteredRows)
         }
     }
 
@@ -162,7 +162,7 @@ suite("test_stream_load_cast", "p0") {
         }
     }
     sql "sync"
-    qt_sql3 "select * from ${tableName}"
+    qt_sql3 "select * from ${tableName}  order by k0"
     sql "sync"
     sql "truncate table ${tableName}"
     sql "sync"
@@ -210,7 +210,7 @@ suite("test_stream_load_cast", "p0") {
         }
     }
     sql "sync"
-    qt_sql4 "select * from ${tableName}"
+    qt_sql4 "select * from ${tableName}  order by k0"
     sql "sync"
     sql "truncate table ${tableName}"
     sql "sync"
@@ -231,9 +231,9 @@ suite("test_stream_load_cast", "p0") {
             }
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
-            assertEquals("fail", json.Status.toLowerCase())
+            assertEquals("success", json.Status.toLowerCase())
             assertEquals(1, json.NumberTotalRows)
-            assertEquals(1, json.NumberFilteredRows)
+            assertEquals(0, json.NumberFilteredRows)
         }
     }
 
@@ -259,7 +259,7 @@ suite("test_stream_load_cast", "p0") {
         }
     }
     sql "sync"
-    qt_sql5 "select * from ${tableName}"
+    qt_sql5 "select * from ${tableName}  order by k0"
     sql "sync"
     sql "truncate table ${tableName}"
     sql "sync"
@@ -311,7 +311,7 @@ suite("test_stream_load_cast", "p0") {
         }
     }
     sql "sync"
-    qt_sql6 "select * from ${tableName}"
+    qt_sql6 "select * from ${tableName}  order by k0"
     sql "sync"
     sql "truncate table ${tableName}"
     sql "sync"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to