This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new e84ae4f30b1 [fix](decimal) Fix long string casting to decimalv2 (#35121) (#35252) e84ae4f30b1 is described below commit e84ae4f30b144e85cd1df8408a5265d8768b9d2d Author: Gabriel <gabrielleeb...@gmail.com> AuthorDate: Fri May 24 11:26:11 2024 +0800 [fix](decimal) Fix long string casting to decimalv2 (#35121) (#35252) --- be/src/util/string_parser.hpp | 169 ++++++++------------- be/test/vec/data_types/from_string_test.cpp | 2 +- .../data_types/serde/data_type_serde_csv_test.cpp | 31 ++-- .../data_types/serde/data_type_serde_text_test.cpp | 37 ++--- .../datatype_p0/decimalv2/test_decimalv2_load.out | 4 - .../decimalv2/test_decimalv2_overflow2.out | 11 +- .../decimalv2/test_decimalv2_load.groovy | 58 ------- .../decimalv2/test_decimalv2_overflow2.groovy | 18 +++ 8 files changed, 121 insertions(+), 209 deletions(-) diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index e531be32d18..18c0aba7350 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -634,123 +634,74 @@ T StringParser::string_to_decimal(const char* s, int len, int type_precision, in bool found_exponent = false; int8_t exponent = 0; T value = 0; - if constexpr (TYPE_DECIMALV2 == P) { - // decimalv2 do not care type_scale and type_precision,just keep the origin logic - for (int i = 0; i < len; ++i) { - const char& c = s[i]; - if (LIKELY('0' <= c && c <= '9')) { - found_value = true; - // Ignore digits once the type's precision limit is reached. This avoids - // overflowing the underlying storage while handling a string like - // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and - // an exponent will be made later. - if (LIKELY(type_precision > precision)) { - value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... - } else { - *result = StringParser::PARSE_OVERFLOW; - value = is_negative - ? vectorized::min_decimal_value<DecimalType>(type_precision) - : vectorized::max_decimal_value<DecimalType>(type_precision); - return value; - } - DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. + bool has_round = false; + for (int i = 0; i < len; ++i) { + const char& c = s[i]; + if (LIKELY('0' <= c && c <= '9')) { + found_value = true; + // Ignore digits once the type's precision limit is reached. This avoids + // overflowing the underlying storage while handling a string like + // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and + // an exponent will be made later. + if (LIKELY(type_precision > precision) && !has_round) { + value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... ++precision; scale += found_dot; - } else if (c == '.' && LIKELY(!found_dot)) { - found_dot = 1; - } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { - found_exponent = true; - exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); - if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { - if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { - *result = StringParser::PARSE_UNDERFLOW; - } - return 0; - } - break; - } else { - if (value == 0) { - *result = StringParser::PARSE_FAILURE; - return 0; - } - *result = StringParser::PARSE_SUCCESS; - value *= get_scale_multiplier<T>(type_scale - scale); - - return is_negative ? T(-value) : T(value); - } - } - } else { - // decimalv3 - bool has_round = false; - for (int i = 0; i < len; ++i) { - const char& c = s[i]; - if (LIKELY('0' <= c && c <= '9')) { - found_value = true; - // Ignore digits once the type's precision limit is reached. This avoids - // overflowing the underlying storage while handling a string like - // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and - // an exponent will be made later. - if (LIKELY(type_precision > precision) && !has_round) { - value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... - ++precision; - scale += found_dot; - cur_digit = precision - scale; - } else if (!found_dot && max_digit < (precision - scale)) { - *result = StringParser::PARSE_OVERFLOW; - value = is_negative - ? vectorized::min_decimal_value<DecimalType>(type_precision) + cur_digit = precision - scale; + } else if (!found_dot && max_digit < (precision - scale)) { + *result = StringParser::PARSE_OVERFLOW; + value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) : vectorized::max_decimal_value<DecimalType>(type_precision); - return value; - } else if (found_dot && scale >= type_scale && !has_round) { - // make rounding cases - if (c > '4') { - value += 1; - } - has_round = true; - continue; - } else if (!found_dot) { - ++cur_digit; - } - DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. - } else if (c == '.' && LIKELY(!found_dot)) { - found_dot = 1; - } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { - found_exponent = true; - exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); - if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { - if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { - *result = StringParser::PARSE_UNDERFLOW; - } - return 0; + return value; + } else if (found_dot && scale >= type_scale && !has_round) { + // make rounding cases + if (c > '4') { + value += 1; } - break; - } else { - if (value == 0) { - *result = StringParser::PARSE_FAILURE; - return 0; + has_round = true; + continue; + } else if (!found_dot) { + ++cur_digit; + } + DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. + } else if (c == '.' && LIKELY(!found_dot)) { + found_dot = 1; + } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { + found_exponent = true; + exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); + if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { + if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { + *result = StringParser::PARSE_UNDERFLOW; } - // here to handle - *result = StringParser::PARSE_SUCCESS; - if (type_scale >= scale) { - value *= get_scale_multiplier<T>(type_scale - scale); - // here meet non-valid character, should return the value, keep going to meet - // the E/e character because we make right user-given type_precision - // not max number type_precision - if (!is_numeric_ascii(c)) { - if (cur_digit > type_precision) { - *result = StringParser::PARSE_OVERFLOW; - value = is_negative ? vectorized::min_decimal_value<DecimalType>( - type_precision) - : vectorized::max_decimal_value<DecimalType>( - type_precision); - return value; - } - return is_negative ? T(-value) : T(value); + return 0; + } + break; + } else { + if (value == 0) { + *result = StringParser::PARSE_FAILURE; + return 0; + } + // here to handle + *result = StringParser::PARSE_SUCCESS; + if (type_scale >= scale) { + value *= get_scale_multiplier<T>(type_scale - scale); + // here meet non-valid character, should return the value, keep going to meet + // the E/e character because we make right user-given type_precision + // not max number type_precision + if (!is_numeric_ascii(c)) { + if (cur_digit > type_precision) { + *result = StringParser::PARSE_OVERFLOW; + value = is_negative + ? vectorized::min_decimal_value<DecimalType>(type_precision) + : vectorized::max_decimal_value<DecimalType>( + type_precision); + return value; } + return is_negative ? T(-value) : T(value); } - - return is_negative ? T(-value) : T(value); } + + return is_negative ? T(-value) : T(value); } } diff --git a/be/test/vec/data_types/from_string_test.cpp b/be/test/vec/data_types/from_string_test.cpp index beeac5d96d8..594b67c7b11 100644 --- a/be/test/vec/data_types/from_string_test.cpp +++ b/be/test/vec/data_types/from_string_test.cpp @@ -104,7 +104,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) { "12345678901234567.012345677", "12345678901234567.012345677", "999999999999999999.999999999"}, {"12345678901234567.012345678", "123456789012345678.012345670", - "12345678901234567.012345678", "", ""}), + "12345678901234567.012345678", "12345678901234567.012345678", ""}), // decimal32 ==> decimal32(9,2) FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32, // (7,2) (6,3) (7,3) (8,1) diff --git a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp index a9f5a7b12f3..82e30e4ca53 100644 --- a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp @@ -74,21 +74,22 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) { FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"}, {"doris be better"}), // decimal ==> decimalv2(decimal<128>(27,9)) - FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL, - { - // (17, 9)(first 0 will ignore) - "012345678901234567.012345678", - // (18, 8) (automatically fill 0 for scala) - "123456789012345678.01234567", - // (17, 10) (rounding last to make it fit) - "12345678901234567.0123456779", - // (17, 11) (rounding last to make it fit) - "12345678901234567.01234567791", - // (19, 8) (wrong) - "1234567890123456789.01234567", - }, - {"12345678901234567.012345678", "123456789012345678.012345670", - "12345678901234567.012345678", "", ""}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DECIMAL, + { + // (17, 9)(first 0 will ignore) + "012345678901234567.012345678", + // (18, 8) (automatically fill 0 for scala) + "123456789012345678.01234567", + // (17, 10) (rounding last to make it fit) + "12345678901234567.0123456779", + // (17, 11) (rounding last to make it fit) + "12345678901234567.01234567791", + // (19, 8) (wrong) + "1234567890123456789.01234567", + }, + {"12345678901234567.012345678", "123456789012345678.012345670", + "12345678901234567.012345678", "12345678901234567.012345678", ""}), // decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1) FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32, {"1234567.12", "123456.123", "1234567.123", "12345679.1"}, diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp index ce19146f83c..652354ea487 100644 --- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp @@ -74,21 +74,22 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"}, {"doris be better"}), // decimal ==> decimalv2(decimal<128>(27,9)) - FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL, - { - // (17, 9)(first 0 will ignore) - "012345678901234567.012345678", - // (18, 8) (automatically fill 0 for scala) - "123456789012345678.01234567", - // (17, 10) (rounding last to make it fit) - "12345678901234567.0123456779", - // (17, 11) (rounding last to make it fit) - "12345678901234567.01234567791", - // (19, 8) (wrong) - "1234567890123456789.01234567", - }, - {"12345678901234567.012345678", "123456789012345678.012345670", - "12345678901234567.012345678", "", ""}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DECIMAL, + { + // (17, 9)(first 0 will ignore) + "012345678901234567.012345678", + // (18, 8) (automatically fill 0 for scala) + "123456789012345678.01234567", + // (17, 10) (rounding last to make it fit) + "12345678901234567.0123456779", + // (17, 11) (rounding last to make it fit) + "12345678901234567.01234567791", + // (19, 8) (wrong) + "1234567890123456789.01234567", + }, + {"12345678901234567.012345678", "123456789012345678.012345670", + "12345678901234567.012345678", "12345678901234567.012345678", ""}), // decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1) FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32, {"1234567.12", "123456.123", "1234567.123", "12345679.1"}, @@ -350,13 +351,13 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { "[\\1234567890123456789.01234567\\]"}, {"[4.000000000, 5.500000000, 6.670000000]", "[12345678901234567.012345678, 123456789012345678.012345670, " - "12345678901234567.012345678, NULL, NULL]", + "12345678901234567.012345678, 12345678901234567.012345678, NULL]", "[NULL, NULL, NULL, NULL, NULL]", "[NULL]"}, {"[4.000000000, 5.500000000, 6.670000000]", "[12345678901234567.012345678, 123456789012345678.012345670, " - "12345678901234567.012345678, NULL, NULL]", + "12345678901234567.012345678, 12345678901234567.012345678, NULL]", "[12345678901234567.012345678, 123456789012345678.012345670, " - "12345678901234567.012345678, NULL, NULL]", + "12345678901234567.012345678, 12345678901234567.012345678, NULL]", "[NULL]"}), }; // array type diff --git a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out index a0f046fc4e9..8156a9144aa 100644 --- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out +++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out @@ -15,7 +15,3 @@ 11.99990 837.43444 --- !decimalv2_insert -- -999999999999999999.999999999 1.000000000 --999999999999999999.999999999 2.000000000 - diff --git a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out index ecce20f1b22..fdd14e48bad 100644 --- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out +++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out @@ -27,10 +27,10 @@ 999999999999999999.999999999 -- !multi_overflow2 -- -999999999999999999.999999999 999999999999999999.999999999000000000 +999999999999999999.999999999 999999999999999999.999999999 -- !multi_overflow3 -- -999999999999999999.999999999 999999999999999999.999999999000000000 +999999999999999999.999999999 999999999999999999.999999999 -- !multi_overflow4 -- 999999999999999999.999999999 1.000000000 999999999999999999.999999999 @@ -39,10 +39,10 @@ 99999999999999999.999999999 0.100000000 999999999999999999.999999990 -- !div_overflow2 -- -999999999999999999.999999990 +999999999999999999.99999999 -- !div_overflow3 -- -99999999999999999.999999999 0.1 999999999999999999.9999999900000 +99999999999999999.999999999 0.1 999999999999999999.999999990 -- !div_overflow4 -- 999999999999999999.999999990 @@ -59,3 +59,6 @@ -- !mod4 -- 0.099999999 +-- !sql -- +2023-12-18T00:00 95357.10 + diff --git a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy index c8f156a3bf9..5c065a921a0 100644 --- a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy +++ b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy @@ -84,64 +84,6 @@ suite("test_decimalv2_load", "nonConcurrent") { select * from ${tableName2} order by 1; """ - sql """ - drop table if exists test_decimalv2_insert; - """ - sql """ - CREATE TABLE `test_decimalv2_insert` ( - `k1` decimalv2(27, 9) null, - `k2` decimalv2(27, 9) null - ) - DISTRIBUTED BY HASH(`k1`) BUCKETS 10 - PROPERTIES ( - "replication_num" = "1" - ); - """ - sql "set enable_insert_strict=true;" - test { - sql """ - insert into test_decimalv2_insert values("999999999999999999999999999999",1); - """ - exception "Invalid" - } - test { - sql """ - insert into test_decimalv2_insert values("-999999999999999999999999999999",2); - """ - exception "Invalid" - } - test { - sql """ - insert into test_decimalv2_insert values("999999999999999999.9999999991", 3); - """ - exception "Invalid" - } - test { - sql """ - insert into test_decimalv2_insert values("-999999999999999999.9999999991",4); - """ - exception "Invalid" - } - test { - sql """ - insert into test_decimalv2_insert values("999999999999999999.9999999995",5); - """ - exception "Invalid" - } - test { - sql """ - insert into test_decimalv2_insert values("-999999999999999999.9999999995",6); - """ - exception "Invalid" - } - sql """ - insert into test_decimalv2_insert values("999999999999999999.999999999", 1); - """ - sql """ - insert into test_decimalv2_insert values("-999999999999999999.999999999", 2); - """ - qt_decimalv2_insert "select * from test_decimalv2_insert order by 2; " - sql """ admin set frontend config("enable_decimal_conversion" = "true"); """ diff --git a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy index ad6dea6765c..b183e00243f 100644 --- a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy +++ b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy @@ -269,6 +269,24 @@ suite("test_decimalv2_overflow2") { """ + sql """ drop TABLE if exists test_table """ + sql """ CREATE TABLE `test_table` ( + `day_date` datetime NULL COMMENT '', + `growth_money` decimalv2(18, 2) NULL COMMENT '' + ) ENGINE=OLAP + UNIQUE KEY(`day_date`) + COMMENT '' + DISTRIBUTED BY HASH(`day_date`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); """ + sql """ insert into test_table values ('2023-12-18', '95357.100000000000000000000000000000000000')""" + qt_sql """ select * from test_table """ + sql """ drop TABLE if exists test_table """ // TODO // decimalv2 +-*/ integer // integer +-*/ decimalv2 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org