This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 6dddd4c499b [function](cast)Make string casting to integers more like MySQL's beh… (#41541) 6dddd4c499b is described below commit 6dddd4c499b4abcdbd0ec0666dbb7d8c83640a96 Author: Mryange <59914473+mrya...@users.noreply.github.com> AuthorDate: Fri Oct 11 09:32:00 2024 +0800 [function](cast)Make string casting to integers more like MySQL's beh… (#41541) …avior (#38847) https://github.com/apache/doris/pull/38847 ## Proposed changes There are two issues here. First, the results of casting are inconsistent between FE and BE . ``` FE mysql [(none)]>select cast('3.000' as int); +----------------------+ | cast('3.000' as INT) | +----------------------+ | 3 | +----------------------+ mysql [(none)]>set debug_skip_fold_constant = true; BE mysql [(none)]>select cast('3.000' as int); +----------------------+ | cast('3.000' as INT) | +----------------------+ | NULL | +----------------------+ ``` The second issue is that casting on BE converts '3.0' to null. Here, the casting logic for FE and BE has been unified <!--Describe your changes.--> ## Proposed changes Issue Number: close #xxx <!--Describe your changes.--> --------- Co-authored-by: Xinyi Zou <zouxiny...@gmail.com> --- be/src/util/parse_util.cpp | 7 +++++++ be/src/util/string_parser.hpp | 20 ++++++++++++++++-- be/test/util/parse_util_test.cpp | 2 +- .../data/datatype_p0/string/test_string_basic.out | 6 ++++++ .../load_p0/stream_load/test_stream_load_cast.out | 9 +++++--- .../cast_function/test_cast_function.out | 6 +++--- .../data/variant_p0/compaction/test_compaction.out | 16 +++++++-------- regression-test/data/variant_p0/load.out | 2 +- .../datatype_p0/string/test_string_basic.groovy | 8 +++++++- .../stream_load/test_stream_load_cast.groovy | 24 +++++++++++----------- 10 files changed, 69 insertions(+), 31 deletions(-) diff --git a/be/src/util/parse_util.cpp b/be/src/util/parse_util.cpp index 8e810764e7b..e1bd796e974 100644 --- a/be/src/util/parse_util.cpp +++ b/be/src/util/parse_util.cpp @@ -98,6 +98,13 @@ int64_t ParseUtil::parse_mem_spec(const std::string& mem_spec_str, int64_t paren if (result != StringParser::PARSE_SUCCESS) { return -1; } + + auto limit_val_double = + StringParser::string_to_float<double>(mem_spec_str.data(), number_str_len, &result); + if (result == StringParser::PARSE_SUCCESS && limit_val_double != limit_val) { + return -1; // mem_spec_str is double. + } + bytes = limit_val; } diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index 8bc6ecae914..a9cea7b8d29 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -243,6 +243,20 @@ private: return true; } + // For strings like "3.0", "3.123", and "3.", can parse them as 3. + static inline bool is_float_suffix(const char* __restrict s, int len) { + return (s[0] == '.' && is_all_digit(s + 1, len - 1)); + } + + static inline bool is_all_digit(const char* __restrict s, int len) { + for (int i = 0; i < len; ++i) { + if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { + return false; + } + } + return true; + } + // Returns the position of the first non-whitespace character in s. static inline int skip_leading_whitespace(const char* __restrict s, int len) { int i = 0; @@ -306,7 +320,8 @@ T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseR } val = val * 10 + digit; } else { - if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { + if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && + !is_float_suffix(s + i, len - i))))) { // Reject the string because either the first char was not a digit, // or the remaining chars are not all whitespace *result = PARSE_FAILURE; @@ -448,7 +463,8 @@ T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, Par T digit = s[i] - '0'; val = val * 10 + digit; } else { - if ((UNLIKELY(!is_all_whitespace(s + i, len - i)))) { + if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && + !is_float_suffix(s + i, len - i)))) { *result = PARSE_FAILURE; return 0; } diff --git a/be/test/util/parse_util_test.cpp b/be/test/util/parse_util_test.cpp index e15aaa323cb..99016b45fca 100644 --- a/be/test/util/parse_util_test.cpp +++ b/be/test/util/parse_util_test.cpp @@ -95,7 +95,7 @@ TEST(TestParseMemSpec, Bad) { for (const auto& value : bad_values) { bool is_percent = false; int64_t bytes = ParseUtil::parse_mem_spec(value, -1, MemInfo::_s_physical_mem, &is_percent); - EXPECT_EQ(-1, bytes); + EXPECT_EQ(-1, bytes) << ", value: " << value; } } diff --git a/regression-test/data/datatype_p0/string/test_string_basic.out b/regression-test/data/datatype_p0/string/test_string_basic.out index 6270f4f5d89..af2c8f666e3 100644 --- a/regression-test/data/datatype_p0/string/test_string_basic.out +++ b/regression-test/data/datatype_p0/string/test_string_basic.out @@ -98,3 +98,9 @@ -- !test -- a +-- !cast_string_to_int -- +3 3 0 0 3 \N 3 + +-- !cast_string_to_int -- +3 3 0 0 3 \N 3 + diff --git a/regression-test/data/load_p0/stream_load/test_stream_load_cast.out b/regression-test/data/load_p0/stream_load/test_stream_load_cast.out index 23256d7aeb0..d2373568a6f 100644 --- a/regression-test/data/load_p0/stream_load/test_stream_load_cast.out +++ b/regression-test/data/load_p0/stream_load/test_stream_load_cast.out @@ -1,18 +1,21 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql1 -- -\N \N \N \N \N \N \N \N \N \N \N \N +3 3 3 3 3 \N \N \N \N \N \N \N -- !sql2 -- -\N \N \N \N \N \N \N \N \N \N \N \N +3 3 3 3 3 \N \N \N \N \N \N \N +3 3 3 3 3 \N \N \N \N \N \N \N -- !sql3 -- \N \N \N \N \N \N \N \N \N \N \N \N +3 3 3 3 3 \N \N \N \N \N \N \N -- !sql4 -- -\N \N \N \N \N 3.12 3.12 3.1 2024-04-02 2024-04-02 2024-04-02T17:00 2024-04-02T17:00 +3 3 3 3 3 3.12 3.12 3.1 2024-04-02 2024-04-02 2024-04-02T17:00 2024-04-02T17:00 -- !sql5 -- \N \N \N \N \N \N \N 99999999.9 \N \N \N \N +3 3 3 3 3 3.12 3.12 3.1 2024-04-02 2024-04-02 2024-04-02T17:00 2024-04-02T17:00 -- !sql6 -- \N \N \N \N \N \N \N 99999999.9 \N \N \N \N diff --git a/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out b/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out index 1eed1411842..6b34e73bd2e 100644 --- a/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out +++ b/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out @@ -30,11 +30,11 @@ true \N -- !sql_to_small -- -\N +1212 -- !sql_to_int -- -\N +1212 -- !sql_to_big -- -\N +1212 diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out b/regression-test/data/variant_p0/compaction/test_compaction.out index 57a5c142fbb..0b905e3930f 100644 --- a/regression-test/data/variant_p0/compaction/test_compaction.out +++ b/regression-test/data/variant_p0/compaction/test_compaction.out @@ -58,8 +58,8 @@ 14 [null] 17 [1] 17 [1] -18 [1, 2, null] -18 [1, 2, null] +18 [1, 2, 1] +18 [1, 2, 1] -- !sql_3 -- 19 1 {"c":1} @@ -146,8 +146,8 @@ 14 [null] 17 [1] 17 [1] -18 [1, 2, null] -18 [1, 2, null] +18 [1, 2, 1] +18 [1, 2, 1] -- !sql_33 -- 19 1 {"c":1} @@ -206,7 +206,7 @@ -- !sql_2 -- 14 [null] 17 [1] -18 [1, 2, null] +18 [1, 2, 1] -- !sql_3 -- 19 1 {"c":1} @@ -259,7 +259,7 @@ -- !sql_22 -- 14 [null] 17 [1] -18 [1, 2, null] +18 [1, 2, 1] -- !sql_33 -- 19 1 {"c":1} @@ -310,7 +310,7 @@ -- !sql_2 -- 14 [null] 17 [1] -18 [1, 2, null] +18 [1, 2, 1] -- !sql_3 -- 19 1 {"c":1} @@ -363,7 +363,7 @@ -- !sql_22 -- 14 [null] 17 [1] -18 [1, 2, null] +18 [1, 2, 1] -- !sql_33 -- 19 1 {"c":1} diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out index f25d054894c..c40c04cd349 100644 --- a/regression-test/data/variant_p0/load.out +++ b/regression-test/data/variant_p0/load.out @@ -1,6 +1,6 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql1 -- -1 [1, 2, null] +1 [1, 2, 1] 1 [1] 1 [1] 1 [null] diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy b/regression-test/suites/datatype_p0/string/test_string_basic.groovy index 36fbddede2d..c33b7669bec 100644 --- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy +++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy @@ -381,5 +381,11 @@ suite("test_string_basic") { } assertEquals(table_too_long, "fail") sql "drop table if exists varchar_table_too_long;" -} + // calculations on the BE. + sql """ set debug_skip_fold_constant = true;""" + qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int) , cast('0000.0000' as int) , cast('0000' as int), cast('3.123' as int), cast('3.000 ' as int), cast('3.' as int)""" + // calculations on the FE. + sql """ set debug_skip_fold_constant = false;""" + qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int) , cast('0000.0000' as int) , cast('0000' as int), cast('3.123' as int), cast('3.000 ' as int), cast('3.' as int)""" +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy index cf2ef15333a..d173c0e76e1 100644 --- a/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy +++ b/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy @@ -62,7 +62,7 @@ suite("test_stream_load_cast", "p0") { } } sql "sync" - qt_sql1 "select * from ${tableName}" + qt_sql1 "select * from ${tableName} order by k0" sql "sync" sql "truncate table ${tableName}" sql "sync" @@ -83,9 +83,9 @@ suite("test_stream_load_cast", "p0") { } log.info("Stream load result: ${result}".toString()) def json = parseJson(result) - assertEquals("fail", json.Status.toLowerCase()) + assertEquals("success", json.Status.toLowerCase()) assertEquals(1, json.NumberTotalRows) - assertEquals(1, json.NumberFilteredRows) + assertEquals(0, json.NumberFilteredRows) } } @@ -112,7 +112,7 @@ suite("test_stream_load_cast", "p0") { } } sql "sync" - qt_sql2 "select * from ${tableName}" + qt_sql2 "select * from ${tableName} order by k0" sql "sync" sql "truncate table ${tableName}" sql "sync" @@ -134,9 +134,9 @@ suite("test_stream_load_cast", "p0") { } log.info("Stream load result: ${result}".toString()) def json = parseJson(result) - assertEquals("fail", json.Status.toLowerCase()) + assertEquals("success", json.Status.toLowerCase()) assertEquals(1, json.NumberTotalRows) - assertEquals(1, json.NumberFilteredRows) + assertEquals(0, json.NumberFilteredRows) } } @@ -162,7 +162,7 @@ suite("test_stream_load_cast", "p0") { } } sql "sync" - qt_sql3 "select * from ${tableName}" + qt_sql3 "select * from ${tableName} order by k0" sql "sync" sql "truncate table ${tableName}" sql "sync" @@ -210,7 +210,7 @@ suite("test_stream_load_cast", "p0") { } } sql "sync" - qt_sql4 "select * from ${tableName}" + qt_sql4 "select * from ${tableName} order by k0" sql "sync" sql "truncate table ${tableName}" sql "sync" @@ -231,9 +231,9 @@ suite("test_stream_load_cast", "p0") { } log.info("Stream load result: ${result}".toString()) def json = parseJson(result) - assertEquals("fail", json.Status.toLowerCase()) + assertEquals("success", json.Status.toLowerCase()) assertEquals(1, json.NumberTotalRows) - assertEquals(1, json.NumberFilteredRows) + assertEquals(0, json.NumberFilteredRows) } } @@ -259,7 +259,7 @@ suite("test_stream_load_cast", "p0") { } } sql "sync" - qt_sql5 "select * from ${tableName}" + qt_sql5 "select * from ${tableName} order by k0" sql "sync" sql "truncate table ${tableName}" sql "sync" @@ -311,7 +311,7 @@ suite("test_stream_load_cast", "p0") { } } sql "sync" - qt_sql6 "select * from ${tableName}" + qt_sql6 "select * from ${tableName} order by k0" sql "sync" sql "truncate table ${tableName}" sql "sync" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org