[doris] branch master updated: [DecimalV3](opt) opt the function of decimalv3 to_string logic (#16427)

gabriellee Mon, 06 Feb 2023 21:28:22 -0800

This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 9114896178 [DecimalV3](opt) opt the function of decimalv3 to_string 
logic (#16427)
9114896178 is described below

commit 91148961784fef3572e07ebf5a9f05a870fd154c
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Tue Feb 7 13:28:07 2023 +0800

    [DecimalV3](opt) opt the function of decimalv3 to_string logic (#16427)
---
 be/src/exec/olap_common.h                   | 13 ++----
 be/src/exprs/runtime_filter.h               | 18 +++-----
 be/src/vec/core/types.h                     | 66 +++++++++++++++++++++++++++++
 be/src/vec/data_types/data_type_decimal.cpp | 11 ++---
 be/src/vec/exprs/vliteral.cpp               | 15 ++++---
 be/src/vec/functions/function_cast.h        |  1 +
 be/src/vec/io/io_helper.h                   | 15 -------
 be/src/vec/utils/histogram_helpers.hpp      | 27 +++---------
 8 files changed, 96 insertions(+), 70 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 2980791b63..fde528131e 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -30,6 +30,7 @@
 #include "olap/olap_tuple.h"
 #include "runtime/primitive_type.h"
 #include "runtime/type_limit.h"
+#include "vec/core/types.h"
 #include "vec/io/io_helper.h"
 #include "vec/runtime/vdatetime_value.h"
 
@@ -38,17 +39,11 @@ namespace doris {
 template <PrimitiveType primitive_type, class T>
 std::string cast_to_string(T value, int scale) {
     if constexpr (primitive_type == TYPE_DECIMAL32) {
-        std::stringstream ss;
-        vectorized::write_text<int32_t>((int32_t)value, scale, ss);
-        return ss.str();
+        return ((vectorized::Decimal<int32_t>)value).to_string(scale);
     } else if constexpr (primitive_type == TYPE_DECIMAL64) {
-        std::stringstream ss;
-        vectorized::write_text<int64_t>((int64_t)value, scale, ss);
-        return ss.str();
+        return ((vectorized::Decimal<int64_t>)value).to_string(scale);
     } else if constexpr (primitive_type == TYPE_DECIMAL128I) {
-        std::stringstream ss;
-        vectorized::write_text<int128_t>((int128_t)value, scale, ss);
-        return ss.str();
+        return ((vectorized::Decimal<int128_t>)value).to_string(scale);
     } else if constexpr (primitive_type == TYPE_TINYINT) {
         return std::to_string(static_cast<int>(value));
     } else if constexpr (primitive_type == TYPE_LARGEINT) {
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index f7a0348d22..c0e2ef25aa 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -453,30 +453,24 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMALV2, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL32) {
-        auto origin_value = reinterpret_cast<const int32_t*>(data);
+        auto origin_value = reinterpret_cast<const 
vectorized::Decimal<int32_t>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        std::stringstream ss;
-        vectorized::write_text<int32_t>(*origin_value, scale, ss);
-        decimal_literal.__set_value(ss.str());
+        decimal_literal.__set_value(origin_value->to_string(scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL64) {
-        auto origin_value = reinterpret_cast<const int64_t*>(data);
+        auto origin_value = reinterpret_cast<const 
vectorized::Decimal<int64_t>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        std::stringstream ss;
-        vectorized::write_text<int64_t>(*origin_value, scale, ss);
-        decimal_literal.__set_value(ss.str());
+        decimal_literal.__set_value(origin_value->to_string(scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL128I) {
-        auto origin_value = reinterpret_cast<const int128_t*>(data);
+        auto origin_value = reinterpret_cast<const 
vectorized::Decimal<int128_t>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        std::stringstream ss;
-        vectorized::write_text<int128_t>(*origin_value, scale, ss);
-        decimal_literal.__set_value(ss.str());
+        decimal_literal.__set_value(origin_value->to_string(scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, 
precision, scale));
     } else if constexpr (T == TYPE_FLOAT) {
diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
index 7f0fefc0ad..d7eac97184 100644
--- a/be/src/vec/core/types.h
+++ b/be/src/vec/core/types.h
@@ -25,7 +25,9 @@
 #include <string>
 #include <vector>
 
+#include "common/consts.h"
 #include "util/binary_cast.hpp"
+#include "vec/common/int_exp.h"
 
 namespace doris {
 
@@ -266,6 +268,21 @@ using DateTimeV2 = UInt64;
 
 struct Int128I {};
 
+template <typename T>
+inline T decimal_scale_multiplier(UInt32 scale);
+template <>
+inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) {
+    return common::exp10_i32(scale);
+}
+template <>
+inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) {
+    return common::exp10_i64(scale);
+}
+template <>
+inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) {
+    return common::exp10_i128(scale);
+}
+
 /// Own FieldType for Decimal.
 /// It is only a "storage" for decimal. To perform operations, you also have 
to provide a scale (number of digits after point).
 template <typename T>
@@ -325,6 +342,55 @@ struct Decimal {
         return *this;
     }
 
+    std::string to_string(UInt32 scale) const {
+        if (value == std::numeric_limits<T>::min()) {
+            fmt::memory_buffer buffer;
+            fmt::format_to(buffer, "{}", value);
+            std::string res {buffer.data(), buffer.size()};
+            res.insert(res.size() - scale, ".");
+            return res;
+        }
+
+        static constexpr auto precision =
+                std::is_same_v<T, Int32>
+                        ? BeConsts::MAX_DECIMAL32_PRECISION
+                        : (std::is_same_v<T, Int64> ? 
BeConsts::MAX_DECIMAL64_PRECISION
+                                                    : 
BeConsts::MAX_DECIMAL128_PRECISION);
+        bool is_nagetive = value < 0;
+        int max_result_length = precision + (scale > 0) // Add a space for 
decimal place
+                                + (scale == precision)  // Add a space for 
leading 0
+                                + (is_nagetive);        // Add a space for 
negative sign
+        std::string str = std::string(max_result_length, '0');
+
+        T abs_value = value;
+        int pos = 0;
+
+        if (is_nagetive) {
+            abs_value = -value;
+            str[pos++] = '-';
+        }
+
+        T whole_part = abs_value;
+        T frac_part;
+        if (scale) {
+            whole_part = abs_value / decimal_scale_multiplier<T>(scale);
+            frac_part = abs_value % decimal_scale_multiplier<T>(scale);
+        }
+        auto end = fmt::format_to(str.data() + pos, "{}", whole_part);
+        pos = end - str.data();
+
+        if (scale) {
+            str[pos++] = '.';
+            for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part > 
0;
+                 --end_pos, frac_part /= 10) {
+                str[end_pos] += frac_part % 10;
+            }
+        }
+
+        str.resize(pos + scale);
+        return str;
+    }
+
     T value;
 };
 
diff --git a/be/src/vec/data_types/data_type_decimal.cpp 
b/be/src/vec/data_types/data_type_decimal.cpp
index 5d00e65033..afd0288154 100644
--- a/be/src/vec/data_types/data_type_decimal.cpp
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -47,9 +47,7 @@ template <typename T>
 std::string DataTypeDecimal<T>::to_string(const IColumn& column, size_t 
row_num) const {
     T value = assert_cast<const 
ColumnType&>(*column.convert_to_full_column_if_const().get())
                       .get_data()[row_num];
-    std::ostringstream buf;
-    write_text(value, scale, buf);
-    return buf.str();
+    return value.to_string(scale);
 }
 
 template <typename T>
@@ -57,11 +55,8 @@ void DataTypeDecimal<T>::to_string(const IColumn& column, 
size_t row_num,
                                    BufferWritable& ostr) const {
     // TODO: Reduce the copy in std::string mem to ostr, like DataTypeNumber
     if constexpr (!IsDecimalV2<T>) {
-        T value = assert_cast<const 
ColumnType&>(*column.convert_to_full_column_if_const().get())
-                          .get_data()[row_num];
-        std::ostringstream buf;
-        write_text(value, scale, buf);
-        std::string str = buf.str();
+        T value = assert_cast<const ColumnType&>(column).get_data()[row_num];
+        auto str = value.to_string(scale);
         ostr.write(str.data(), str.size());
     } else {
         DecimalV2Value value = (DecimalV2Value)assert_cast<const ColumnType&>(
diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp
index 782c8161b2..fe0170399e 100644
--- a/be/src/vec/exprs/vliteral.cpp
+++ b/be/src/vec/exprs/vliteral.cpp
@@ -267,18 +267,21 @@ std::string VLiteral::value() const {
                 break;
             }
             case TYPE_DECIMAL32: {
-                write_text<int32_t>(*(reinterpret_cast<const 
int32_t*>(ref.data)), _type.scale,
-                                    out);
+                auto str =
+                        reinterpret_cast<const 
Decimal<int32_t>*>(ref.data)->to_string(_type.scale);
+                out << str;
                 break;
             }
             case TYPE_DECIMAL64: {
-                write_text<int64_t>(*(reinterpret_cast<const 
int64_t*>(ref.data)), _type.scale,
-                                    out);
+                auto str =
+                        reinterpret_cast<const 
Decimal<int64_t>*>(ref.data)->to_string(_type.scale);
+                out << str;
                 break;
             }
             case TYPE_DECIMAL128I: {
-                write_text<int128_t>(*(reinterpret_cast<const 
int128_t*>(ref.data)), _type.scale,
-                                     out);
+                auto str = reinterpret_cast<const 
Decimal<int128_t>*>(ref.data)->to_string(
+                        _type.scale);
+                out << str;
                 break;
             }
             default: {
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 59f58be84c..0fecd16f20 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -324,6 +324,7 @@ struct ConvertImplGenericToString {
         size_t size = col_from.size();
 
         auto col_to = ColumnString::create();
+        col_to->reserve(size * 2);
         VectorBufferWriter write_buffer(*col_to.get());
         for (size_t i = 0; i < size; ++i) {
             type.to_string(col_from, i, write_buffer);
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 5c848188c5..457ae26d50 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -42,21 +42,6 @@ static constexpr size_t DEFAULT_MAX_STRING_SIZE = 
1073741824; // 1GB
 static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824;   // 1GB
 static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U;
 
-template <typename T>
-inline T decimal_scale_multiplier(UInt32 scale);
-template <>
-inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) {
-    return common::exp10_i32(scale);
-}
-template <>
-inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) {
-    return common::exp10_i64(scale);
-}
-template <>
-inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) {
-    return common::exp10_i128(scale);
-}
-
 inline std::string int128_to_string(__int128_t value) {
     fmt::memory_buffer buffer;
     fmt::format_to(buffer, "{}", value);
diff --git a/be/src/vec/utils/histogram_helpers.hpp 
b/be/src/vec/utils/histogram_helpers.hpp
index 206948d733..b6ab1f5547 100644
--- a/be/src/vec/utils/histogram_helpers.hpp
+++ b/be/src/vec/utils/histogram_helpers.hpp
@@ -163,7 +163,13 @@ bool value_to_bucket(std::vector<Bucket<T>>& buckets, T v, 
size_t num_per_bucket
 
 template <typename T>
 bool value_to_string(std::stringstream& ss, T input, const DataTypePtr& 
data_type) {
-    fmt::memory_buffer _insert_stmt_buffer;
+    if constexpr (std::is_same_v<T, Decimal32> || std::is_same_v<T, Decimal64> 
||
+                  std::is_same_v<T, Decimal128> || std::is_same_v<T, 
Decimal128I>) {
+        auto scale = get_decimal_scale(*data_type);
+        ss << input.to_string(scale);
+        return true;
+    }
+
     switch (data_type->get_type_id()) {
     case TypeIndex::Int8:
     case TypeIndex::UInt8:
@@ -183,25 +189,6 @@ bool value_to_string(std::stringstream& ss, T input, const 
DataTypePtr& data_typ
         ss << std::string(buffer.data(), buffer.size());
         break;
     }
-    case TypeIndex::Decimal32: {
-        auto scale = get_decimal_scale(*data_type);
-        auto decimal_val = reinterpret_cast<const Decimal32*>(&input);
-        write_text(*decimal_val, scale, ss);
-        break;
-    }
-    case TypeIndex::Decimal64: {
-        auto scale = get_decimal_scale(*data_type);
-        auto decimal_val = reinterpret_cast<const Decimal64*>(&input);
-        write_text(*decimal_val, scale, ss);
-        break;
-    }
-    case TypeIndex::Decimal128:
-    case TypeIndex::Decimal128I: {
-        auto scale = get_decimal_scale(*data_type);
-        auto decimal_val = reinterpret_cast<const Decimal128*>(&input);
-        write_text(*decimal_val, scale, ss);
-        break;
-    }
     case TypeIndex::Date:
     case TypeIndex::DateTime: {
         auto* date_int = reinterpret_cast<Int64*>(&input);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris] branch master updated: [DecimalV3](opt) opt the function of decimalv3 to_string logic (#16427)

Reply via email to