(doris) branch branch-2.0 updated: [FIX](decimalv3)Fix decimalv3 with dot string #26072 (#26371)

kxiao Fri, 03 Nov 2023 01:26:13 -0700

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 474f516991b [FIX](decimalv3)Fix decimalv3 with dot string #26072 
(#26371)
474f516991b is described below

commit 474f516991b242f7c7175966c456f0af4b2d34e1
Author: amory <[email protected]>
AuthorDate: Fri Nov 3 16:25:45 2023 +0800

    [FIX](decimalv3)Fix decimalv3 with dot string #26072 (#26371)
---
 be/src/util/string_parser.hpp                      | 43 +++++++++++++++-------
 regression-test/data/query_p0/cast/test_cast.out   | 15 ++++++++
 .../suites/query_p0/cast/test_cast.groovy          | 16 ++++++++
 3 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index f85faf2377e..d1726202e46 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -599,11 +599,9 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
     }
 
     // Ignore leading zeros.
-    bool leading_zero = false;
     bool found_value = false;
     while (len > 0 && UNLIKELY(*s == '0')) {
         found_value = true;
-        leading_zero = true;
         ++s;
         --len;
     }
@@ -627,6 +625,7 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
 
     int precision = 0;
     int max_digit = type_precision - type_scale;
+    int cur_digit = 0;
     bool found_exponent = false;
     int8_t exponent = 0;
     T value = 0;
@@ -682,31 +681,37 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
         }
     } else {
         // decimalv3
+        bool has_round = false;
         for (int i = 0; i < len; ++i) {
             const char& c = s[i];
-            // keep a rounding precision to round the decimal value
-            if (LIKELY('0' <= c && c <= '9') &&
-                ((!leading_zero && LIKELY(type_precision >= precision)) ||
-                 (leading_zero && type_precision > precision))) {
+            if (LIKELY('0' <= c && c <= '9')) {
                 found_value = true;
                 // Ignore digits once the type's precision limit is reached. 
This avoids
                 // overflowing the underlying storage while handling a string 
like
                 // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for 
ignored digits and
                 // an exponent will be made later.
-                ++precision;
-                scale += found_dot;
-                // decimalv3 should make sure the type_scale and type_precision
-                if (!found_dot && max_digit < (precision - scale)) {
-                    // parse_overflow should only happen when the digit part 
reached the max
+                if (LIKELY(type_precision > precision) && !has_round) {
+                    value = (value * 10) + (c - '0'); // Benchmarks are faster 
with parenthesis...
+                    ++precision;
+                    scale += found_dot;
+                    cur_digit = precision - scale;
+                } else if (!found_dot && max_digit < (precision - scale)) {
                     *result = StringParser::PARSE_OVERFLOW;
                     value = is_negative ? 
vectorized::min_decimal_value<vectorized::Decimal<T>>(
                                                   type_precision)
                                         : 
vectorized::max_decimal_value<vectorized::Decimal<T>>(
                                                   type_precision);
                     return value;
+                } else if (found_dot && scale >= type_scale && !has_round) {
+                    // make rounding cases
+                    if (c > '4') {
+                        value += 1;
+                    }
+                    has_round = true;
+                    continue;
+                } else if (!found_dot) {
+                    ++cur_digit;
                 }
-                // keep a rounding precision to round the decimal value
-                value = (value * 10) + (c - '0'); // Benchmarks are faster 
with parenthesis...
                 DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't 
work with __int128.
             } else if (c == '.' && LIKELY(!found_dot)) {
                 found_dot = 1;
@@ -721,7 +726,6 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
                 }
                 break;
             } else {
-                // jump to here: should handle the wrong character of decimal
                 if (value == 0) {
                     *result = StringParser::PARSE_FAILURE;
                     return 0;
@@ -734,9 +738,20 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
                     // the E/e character because we make right user-given 
type_precision
                     // not max number type_precision
                     if (!is_numeric_ascii(c)) {
+                        if (cur_digit > type_precision) {
+                            *result = StringParser::PARSE_OVERFLOW;
+                            value = is_negative
+                                            ? 
vectorized::min_decimal_value<vectorized::Decimal<T>>(
+                                                      type_precision)
+                                            : 
vectorized::max_decimal_value<vectorized::Decimal<T>>(
+                                                      type_precision);
+                            return value;
+                        }
                         return is_negative ? T(-value) : T(value);
                     }
                 }
+
+                return is_negative ? T(-value) : T(value);
             }
         }
     }
diff --git a/regression-test/data/query_p0/cast/test_cast.out 
b/regression-test/data/query_p0/cast/test_cast.out
index 89b29cc663d..b5d75ca4ee0 100644
--- a/regression-test/data/query_p0/cast/test_cast.out
+++ b/regression-test/data/query_p0/cast/test_cast.out
@@ -1,4 +1,19 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_1 --
+123456789
+
+-- !sql_2 --
+999999999
+
+-- !sql_3 --
+123456790
+
+-- !sql_4 --
+926895541712428044
+
+-- !sql_5 --
+99999999999999999.9
+
 -- !sql_decimalv3 --
 0
 
diff --git a/regression-test/suites/query_p0/cast/test_cast.groovy 
b/regression-test/suites/query_p0/cast/test_cast.groovy
index c39f89b645e..a7ce1c41fa2 100644
--- a/regression-test/suites/query_p0/cast/test_cast.groovy
+++ b/regression-test/suites/query_p0/cast/test_cast.groovy
@@ -47,6 +47,22 @@ suite('test_cast') {
         result([[-9.9]])
     }
 
+    // round
+    //result([[123456789]])
+    qt_sql_1 " select cast('123456789.0' as DECIMALV3(9, 0)) "
+
+    // result([[999999999]])
+    qt_sql_2 " select cast('999999999.0' as DECIMALV3(9, 0)) "
+
+    // result([[123456790]])
+    qt_sql_3 " select cast('123456789.9' as DECIMALV3(9, 0)) "
+
+    // result([[926895541712428044]])
+    qt_sql_4 " select cast('926895541712428044.1' as DECIMALV3(18,0)); "
+
+    // result([[99999999999999999.9]])
+    qt_sql_5 " select cast('926895541712428044.1' as DECIMAL(18,1)); "
+
     // leading-zeros
     qt_sql_decimalv3 """select CAST('0.29401599228723063' AS DECIMALV3)"""
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-2.0 updated: [FIX](decimalv3)Fix decimalv3 with dot string #26072 (#26371)

Reply via email to