This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 9114896178 [DecimalV3](opt) opt the function of decimalv3 to_string logic (#16427) 9114896178 is described below commit 91148961784fef3572e07ebf5a9f05a870fd154c Author: HappenLee <happen...@hotmail.com> AuthorDate: Tue Feb 7 13:28:07 2023 +0800 [DecimalV3](opt) opt the function of decimalv3 to_string logic (#16427) --- be/src/exec/olap_common.h | 13 ++---- be/src/exprs/runtime_filter.h | 18 +++----- be/src/vec/core/types.h | 66 +++++++++++++++++++++++++++++ be/src/vec/data_types/data_type_decimal.cpp | 11 ++--- be/src/vec/exprs/vliteral.cpp | 15 ++++--- be/src/vec/functions/function_cast.h | 1 + be/src/vec/io/io_helper.h | 15 ------- be/src/vec/utils/histogram_helpers.hpp | 27 +++--------- 8 files changed, 96 insertions(+), 70 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 2980791b63..fde528131e 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -30,6 +30,7 @@ #include "olap/olap_tuple.h" #include "runtime/primitive_type.h" #include "runtime/type_limit.h" +#include "vec/core/types.h" #include "vec/io/io_helper.h" #include "vec/runtime/vdatetime_value.h" @@ -38,17 +39,11 @@ namespace doris { template <PrimitiveType primitive_type, class T> std::string cast_to_string(T value, int scale) { if constexpr (primitive_type == TYPE_DECIMAL32) { - std::stringstream ss; - vectorized::write_text<int32_t>((int32_t)value, scale, ss); - return ss.str(); + return ((vectorized::Decimal<int32_t>)value).to_string(scale); } else if constexpr (primitive_type == TYPE_DECIMAL64) { - std::stringstream ss; - vectorized::write_text<int64_t>((int64_t)value, scale, ss); - return ss.str(); + return ((vectorized::Decimal<int64_t>)value).to_string(scale); } else if constexpr (primitive_type == TYPE_DECIMAL128I) { - std::stringstream ss; - vectorized::write_text<int128_t>((int128_t)value, scale, ss); - return ss.str(); + return ((vectorized::Decimal<int128_t>)value).to_string(scale); } else if constexpr (primitive_type == TYPE_TINYINT) { return std::to_string(static_cast<int>(value)); } else if constexpr (primitive_type == TYPE_LARGEINT) { diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index f7a0348d22..c0e2ef25aa 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -453,30 +453,24 @@ Status create_texpr_literal_node(const void* data, TExprNode* node, int precisio (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMALV2, precision, scale)); } else if constexpr (T == TYPE_DECIMAL32) { - auto origin_value = reinterpret_cast<const int32_t*>(data); + auto origin_value = reinterpret_cast<const vectorized::Decimal<int32_t>*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - std::stringstream ss; - vectorized::write_text<int32_t>(*origin_value, scale, ss); - decimal_literal.__set_value(ss.str()); + decimal_literal.__set_value(origin_value->to_string(scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32, precision, scale)); } else if constexpr (T == TYPE_DECIMAL64) { - auto origin_value = reinterpret_cast<const int64_t*>(data); + auto origin_value = reinterpret_cast<const vectorized::Decimal<int64_t>*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - std::stringstream ss; - vectorized::write_text<int64_t>(*origin_value, scale, ss); - decimal_literal.__set_value(ss.str()); + decimal_literal.__set_value(origin_value->to_string(scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, precision, scale)); } else if constexpr (T == TYPE_DECIMAL128I) { - auto origin_value = reinterpret_cast<const int128_t*>(data); + auto origin_value = reinterpret_cast<const vectorized::Decimal<int128_t>*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - std::stringstream ss; - vectorized::write_text<int128_t>(*origin_value, scale, ss); - decimal_literal.__set_value(ss.str()); + decimal_literal.__set_value(origin_value->to_string(scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, precision, scale)); } else if constexpr (T == TYPE_FLOAT) { diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 7f0fefc0ad..d7eac97184 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -25,7 +25,9 @@ #include <string> #include <vector> +#include "common/consts.h" #include "util/binary_cast.hpp" +#include "vec/common/int_exp.h" namespace doris { @@ -266,6 +268,21 @@ using DateTimeV2 = UInt64; struct Int128I {}; +template <typename T> +inline T decimal_scale_multiplier(UInt32 scale); +template <> +inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) { + return common::exp10_i32(scale); +} +template <> +inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) { + return common::exp10_i64(scale); +} +template <> +inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) { + return common::exp10_i128(scale); +} + /// Own FieldType for Decimal. /// It is only a "storage" for decimal. To perform operations, you also have to provide a scale (number of digits after point). template <typename T> @@ -325,6 +342,55 @@ struct Decimal { return *this; } + std::string to_string(UInt32 scale) const { + if (value == std::numeric_limits<T>::min()) { + fmt::memory_buffer buffer; + fmt::format_to(buffer, "{}", value); + std::string res {buffer.data(), buffer.size()}; + res.insert(res.size() - scale, "."); + return res; + } + + static constexpr auto precision = + std::is_same_v<T, Int32> + ? BeConsts::MAX_DECIMAL32_PRECISION + : (std::is_same_v<T, Int64> ? BeConsts::MAX_DECIMAL64_PRECISION + : BeConsts::MAX_DECIMAL128_PRECISION); + bool is_nagetive = value < 0; + int max_result_length = precision + (scale > 0) // Add a space for decimal place + + (scale == precision) // Add a space for leading 0 + + (is_nagetive); // Add a space for negative sign + std::string str = std::string(max_result_length, '0'); + + T abs_value = value; + int pos = 0; + + if (is_nagetive) { + abs_value = -value; + str[pos++] = '-'; + } + + T whole_part = abs_value; + T frac_part; + if (scale) { + whole_part = abs_value / decimal_scale_multiplier<T>(scale); + frac_part = abs_value % decimal_scale_multiplier<T>(scale); + } + auto end = fmt::format_to(str.data() + pos, "{}", whole_part); + pos = end - str.data(); + + if (scale) { + str[pos++] = '.'; + for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part > 0; + --end_pos, frac_part /= 10) { + str[end_pos] += frac_part % 10; + } + } + + str.resize(pos + scale); + return str; + } + T value; }; diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp index 5d00e65033..afd0288154 100644 --- a/be/src/vec/data_types/data_type_decimal.cpp +++ b/be/src/vec/data_types/data_type_decimal.cpp @@ -47,9 +47,7 @@ template <typename T> std::string DataTypeDecimal<T>::to_string(const IColumn& column, size_t row_num) const { T value = assert_cast<const ColumnType&>(*column.convert_to_full_column_if_const().get()) .get_data()[row_num]; - std::ostringstream buf; - write_text(value, scale, buf); - return buf.str(); + return value.to_string(scale); } template <typename T> @@ -57,11 +55,8 @@ void DataTypeDecimal<T>::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { // TODO: Reduce the copy in std::string mem to ostr, like DataTypeNumber if constexpr (!IsDecimalV2<T>) { - T value = assert_cast<const ColumnType&>(*column.convert_to_full_column_if_const().get()) - .get_data()[row_num]; - std::ostringstream buf; - write_text(value, scale, buf); - std::string str = buf.str(); + T value = assert_cast<const ColumnType&>(column).get_data()[row_num]; + auto str = value.to_string(scale); ostr.write(str.data(), str.size()); } else { DecimalV2Value value = (DecimalV2Value)assert_cast<const ColumnType&>( diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp index 782c8161b2..fe0170399e 100644 --- a/be/src/vec/exprs/vliteral.cpp +++ b/be/src/vec/exprs/vliteral.cpp @@ -267,18 +267,21 @@ std::string VLiteral::value() const { break; } case TYPE_DECIMAL32: { - write_text<int32_t>(*(reinterpret_cast<const int32_t*>(ref.data)), _type.scale, - out); + auto str = + reinterpret_cast<const Decimal<int32_t>*>(ref.data)->to_string(_type.scale); + out << str; break; } case TYPE_DECIMAL64: { - write_text<int64_t>(*(reinterpret_cast<const int64_t*>(ref.data)), _type.scale, - out); + auto str = + reinterpret_cast<const Decimal<int64_t>*>(ref.data)->to_string(_type.scale); + out << str; break; } case TYPE_DECIMAL128I: { - write_text<int128_t>(*(reinterpret_cast<const int128_t*>(ref.data)), _type.scale, - out); + auto str = reinterpret_cast<const Decimal<int128_t>*>(ref.data)->to_string( + _type.scale); + out << str; break; } default: { diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 59f58be84c..0fecd16f20 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -324,6 +324,7 @@ struct ConvertImplGenericToString { size_t size = col_from.size(); auto col_to = ColumnString::create(); + col_to->reserve(size * 2); VectorBufferWriter write_buffer(*col_to.get()); for (size_t i = 0; i < size; ++i) { type.to_string(col_from, i, write_buffer); diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index 5c848188c5..457ae26d50 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -42,21 +42,6 @@ static constexpr size_t DEFAULT_MAX_STRING_SIZE = 1073741824; // 1GB static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824; // 1GB static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U; -template <typename T> -inline T decimal_scale_multiplier(UInt32 scale); -template <> -inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) { - return common::exp10_i32(scale); -} -template <> -inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) { - return common::exp10_i64(scale); -} -template <> -inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) { - return common::exp10_i128(scale); -} - inline std::string int128_to_string(__int128_t value) { fmt::memory_buffer buffer; fmt::format_to(buffer, "{}", value); diff --git a/be/src/vec/utils/histogram_helpers.hpp b/be/src/vec/utils/histogram_helpers.hpp index 206948d733..b6ab1f5547 100644 --- a/be/src/vec/utils/histogram_helpers.hpp +++ b/be/src/vec/utils/histogram_helpers.hpp @@ -163,7 +163,13 @@ bool value_to_bucket(std::vector<Bucket<T>>& buckets, T v, size_t num_per_bucket template <typename T> bool value_to_string(std::stringstream& ss, T input, const DataTypePtr& data_type) { - fmt::memory_buffer _insert_stmt_buffer; + if constexpr (std::is_same_v<T, Decimal32> || std::is_same_v<T, Decimal64> || + std::is_same_v<T, Decimal128> || std::is_same_v<T, Decimal128I>) { + auto scale = get_decimal_scale(*data_type); + ss << input.to_string(scale); + return true; + } + switch (data_type->get_type_id()) { case TypeIndex::Int8: case TypeIndex::UInt8: @@ -183,25 +189,6 @@ bool value_to_string(std::stringstream& ss, T input, const DataTypePtr& data_typ ss << std::string(buffer.data(), buffer.size()); break; } - case TypeIndex::Decimal32: { - auto scale = get_decimal_scale(*data_type); - auto decimal_val = reinterpret_cast<const Decimal32*>(&input); - write_text(*decimal_val, scale, ss); - break; - } - case TypeIndex::Decimal64: { - auto scale = get_decimal_scale(*data_type); - auto decimal_val = reinterpret_cast<const Decimal64*>(&input); - write_text(*decimal_val, scale, ss); - break; - } - case TypeIndex::Decimal128: - case TypeIndex::Decimal128I: { - auto scale = get_decimal_scale(*data_type); - auto decimal_val = reinterpret_cast<const Decimal128*>(&input); - write_text(*decimal_val, scale, ss); - break; - } case TypeIndex::Date: case TypeIndex::DateTime: { auto* date_int = reinterpret_cast<Int64*>(&input); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org