This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new ef65dcedb4e branch-4.0: [Improvement](function) optimize trivial
function deserialize_and_merge_vec #58882 (#59439)
ef65dcedb4e is described below
commit ef65dcedb4e8804f3a64241c0c599d7ee664858e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jan 5 17:05:10 2026 +0800
branch-4.0: [Improvement](function) optimize trivial function
deserialize_and_merge_vec #58882 (#59439)
Cherry-picked from #58882
---------
Co-authored-by: Pxl <[email protected]>
---
.../vec/aggregate_functions/aggregate_function.h | 8 +-
.../aggregate_functions/aggregate_function_avg.h | 4 +-
.../aggregate_function_foreach.h | 4 +-
.../aggregate_functions/aggregate_function_null.h | 89 ++++++++++++++++++++--
.../aggregate_function_state_union.h | 2 +-
.../aggregate_functions/aggregate_function_sum.h | 4 +-
be/src/vec/common/string_buffer.hpp | 3 +-
be/src/vec/exec/format/table/paimon_jni_reader.cpp | 1 +
be/test/vec/olap/jsonb_value_test.cpp | 1 +
9 files changed, 97 insertions(+), 19 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function.h
b/be/src/vec/aggregate_functions/aggregate_function.h
index b9c2de9c148..cdcef2f248f 100644
--- a/be/src/vec/aggregate_functions/aggregate_function.h
+++ b/be/src/vec/aggregate_functions/aggregate_function.h
@@ -27,13 +27,11 @@
#include "common/status.h"
#include "util/defer_op.h"
#include "vec/columns/column_complex.h"
+#include "vec/columns/column_fixed_length_object.h"
#include "vec/columns/column_string.h"
#include "vec/common/assert_cast.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
#include "vec/common/string_buffer.hpp"
-#include "vec/core/block.h"
-#include "vec/core/column_numbers.h"
-#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_string.h"
@@ -110,7 +108,7 @@ public:
virtual void reset(AggregateDataPtr place) const = 0;
/// It is not necessary to delete data.
- virtual bool has_trivial_destructor() const = 0;
+ virtual bool is_trivial() const = 0;
/// Get `sizeof` of structure with data.
virtual size_t size_of_data() const = 0;
@@ -641,7 +639,7 @@ public:
void destroy(AggregateDataPtr __restrict place) const noexcept override {
data(place).~Data(); }
- bool has_trivial_destructor() const override { return
std::is_trivially_destructible_v<Data>; }
+ bool is_trivial() const override { return false; }
size_t size_of_data() const override { return sizeof(Data); }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h
b/be/src/vec/aggregate_functions/aggregate_function_avg.h
index 0e263c0ec79..f36a11e04d0 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_avg.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h
@@ -147,6 +147,8 @@ public:
}
}
+ bool is_trivial() const override { return true; }
+
template <bool is_add>
NO_SANITIZE_UNDEFINED void update_value(AggregateDataPtr __restrict place,
const IColumn** columns, ssize_t
row_num) const {
@@ -271,7 +273,6 @@ public:
AggregateDataPtr rhs, const IColumn*
column, Arena& arena,
const size_t num_rows) const override {
this->deserialize_from_column(rhs, *column, arena, num_rows);
- DEFER({ this->destroy_vec(rhs, num_rows); });
this->merge_vec(places, offset, rhs, arena, num_rows);
}
@@ -279,7 +280,6 @@ public:
AggregateDataPtr rhs, const
IColumn* column,
Arena& arena, const size_t
num_rows) const override {
this->deserialize_from_column(rhs, *column, arena, num_rows);
- DEFER({ this->destroy_vec(rhs, num_rows); });
this->merge_vec_selected(places, offset, rhs, arena, num_rows);
}
diff --git a/be/src/vec/aggregate_functions/aggregate_function_foreach.h
b/be/src/vec/aggregate_functions/aggregate_function_foreach.h
index b39ce6a530e..0e7eb9de495 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_foreach.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_foreach.h
@@ -158,8 +158,8 @@ public:
}
}
- bool has_trivial_destructor() const override {
- return std::is_trivially_destructible_v<Data> &&
nested_function->has_trivial_destructor();
+ bool is_trivial() const override {
+ return std::is_trivial_v<Data> && nested_function->is_trivial();
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h
b/be/src/vec/aggregate_functions/aggregate_function_null.h
index b9477e710c4..5b8e5559a3c 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_null.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_null.h
@@ -23,13 +23,16 @@
#include <glog/logging.h>
#include <array>
+#include <memory>
+#include "common/exception.h"
#include "common/logging.h"
#include "common/status.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_distinct.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/assert_cast.h"
+#include "vec/common/string_buffer.hpp"
#include "vec/core/types.h"
#include "vec/data_types/data_type_nullable.h"
@@ -161,9 +164,7 @@ public:
nested_function->reset(nested_place(place));
}
- bool has_trivial_destructor() const override {
- return nested_function->has_trivial_destructor();
- }
+ bool is_trivial() const override { return false; }
size_t size_of_data() const override { return prefix_size +
nested_function->size_of_data(); }
@@ -177,11 +178,10 @@ public:
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
Arena& arena) const override {
- if (result_is_nullable && get_flag(rhs)) {
+ if (get_flag(rhs)) {
set_flag(place);
+ nested_function->merge(nested_place(place), nested_place(rhs),
arena);
}
-
- nested_function->merge(nested_place(place), nested_place(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
@@ -206,6 +206,83 @@ public:
}
}
+ void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t
offset,
+ AggregateDataPtr rhs, const IColumn*
column, Arena& arena,
+ const size_t num_rows) const override {
+ if (nested_function->is_trivial()) {
+ BufferReadable buf({column->get_data_at(0).data, 0});
+ size_t size_of_data = this->size_of_data();
+ if constexpr (result_is_nullable) {
+ for (int i = 0; i != num_rows; ++i) {
+ buf.read_binary(*(bool*)(rhs + size_of_data * i));
+ if (get_flag(rhs + size_of_data * i)) {
+ nested_function->deserialize(nested_place(rhs +
size_of_data * i), buf,
+ arena);
+ }
+ }
+ for (size_t i = 0; i != num_rows; ++i) {
+ if (get_flag(rhs + size_of_data * i)) {
+ set_flag(places[i] + offset);
+ nested_function->merge(nested_place(places[i] +
offset),
+ nested_place(rhs + size_of_data
* i), arena);
+ }
+ }
+ } else {
+ for (size_t i = 0; i != num_rows; ++i) {
+ nested_function->deserialize(rhs + size_of_data * i, buf,
arena);
+ }
+ for (size_t i = 0; i != num_rows; ++i) {
+ nested_function->merge(places[i] + offset, rhs +
size_of_data * i, arena);
+ }
+ }
+ } else {
+
IAggregateFunctionHelper<Derived>::deserialize_and_merge_vec(places, offset,
rhs,
+
column, arena, num_rows);
+ }
+ }
+
+ void deserialize_and_merge_vec_selected(const AggregateDataPtr* places,
size_t offset,
+ AggregateDataPtr rhs, const
IColumn* column,
+ Arena& arena, const size_t
num_rows) const override {
+ if (nested_function->is_trivial()) {
+ BufferReadable buf({column->get_data_at(0).data, 0});
+ size_t size_of_data = this->size_of_data();
+ if constexpr (result_is_nullable) {
+ for (int i = 0; i != num_rows; ++i) {
+ if (!places[i]) {
+ continue;
+ }
+ buf.read_binary(*(bool*)(rhs + size_of_data * i));
+ if (get_flag(rhs + size_of_data * i)) {
+ nested_function->deserialize(nested_place(rhs +
size_of_data * i), buf,
+ arena);
+ }
+ }
+ for (size_t i = 0; i != num_rows; ++i) {
+ if (places[i] && get_flag(rhs + size_of_data * i)) {
+ set_flag(places[i] + offset);
+ nested_function->merge(nested_place(places[i] +
offset),
+ nested_place(rhs + size_of_data
* i), arena);
+ }
+ }
+ } else {
+ for (size_t i = 0; i != num_rows; ++i) {
+ if (places[i]) {
+ nested_function->deserialize(rhs + size_of_data * i,
buf, arena);
+ }
+ }
+ for (size_t i = 0; i != num_rows; ++i) {
+ if (places[i]) {
+ nested_function->merge(places[i] + offset, rhs +
size_of_data * i, arena);
+ }
+ }
+ }
+ } else {
+
IAggregateFunctionHelper<Derived>::deserialize_and_merge_vec_selected(
+ places, offset, rhs, column, arena, num_rows);
+ }
+ }
+
void deserialize_and_merge(AggregateDataPtr __restrict place,
AggregateDataPtr __restrict rhs,
BufferReadable& buf, Arena& arena) const
override {
bool flag = true;
diff --git a/be/src/vec/aggregate_functions/aggregate_function_state_union.h
b/be/src/vec/aggregate_functions/aggregate_function_state_union.h
index 9b9bfb84fc4..09705cc3647 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_state_union.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_state_union.h
@@ -101,7 +101,7 @@ public:
_function->destroy(place);
}
- bool has_trivial_destructor() const override { return
_function->has_trivial_destructor(); }
+ bool is_trivial() const override { return false; }
size_t size_of_data() const override { return _function->size_of_data(); }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h
b/be/src/vec/aggregate_functions/aggregate_function_sum.h
index 81a6127d9a9..e7967db7d94 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_sum.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h
@@ -102,6 +102,8 @@ public:
}
}
+ bool is_trivial() const override { return true; }
+
void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
Arena&) const override {
const auto& column =
@@ -192,7 +194,6 @@ public:
AggregateDataPtr rhs, const IColumn*
column, Arena& arena,
const size_t num_rows) const override {
this->deserialize_from_column(rhs, *column, arena, num_rows);
- DEFER({ this->destroy_vec(rhs, num_rows); });
this->merge_vec(places, offset, rhs, arena, num_rows);
}
@@ -200,7 +201,6 @@ public:
AggregateDataPtr rhs, const
IColumn* column,
Arena& arena, const size_t
num_rows) const override {
this->deserialize_from_column(rhs, *column, arena, num_rows);
- DEFER({ this->destroy_vec(rhs, num_rows); });
this->merge_vec_selected(places, offset, rhs, arena, num_rows);
}
diff --git a/be/src/vec/common/string_buffer.hpp
b/be/src/vec/common/string_buffer.hpp
index 33a8397f610..15d4f933db9 100644
--- a/be/src/vec/common/string_buffer.hpp
+++ b/be/src/vec/common/string_buffer.hpp
@@ -256,7 +256,8 @@ public:
template <typename Type>
void read_binary(Type& x) {
static_assert(std::is_standard_layout_v<Type>);
- read(reinterpret_cast<char*>(&x), sizeof(x));
+ memcpy_fixed<Type>(reinterpret_cast<char*>(&x), _data);
+ _data += sizeof(x);
}
template <typename Type>
diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
index 942f6a83971..f62e7afa14c 100644
--- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
+++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
@@ -22,6 +22,7 @@
#include "runtime/descriptors.h"
#include "runtime/runtime_state.h"
#include "runtime/types.h"
+#include "vec/core/block.h"
#include "vec/core/types.h"
namespace doris {
class RuntimeProfile;
diff --git a/be/test/vec/olap/jsonb_value_test.cpp
b/be/test/vec/olap/jsonb_value_test.cpp
index d6b5db784e2..97f858d63e1 100644
--- a/be/test/vec/olap/jsonb_value_test.cpp
+++ b/be/test/vec/olap/jsonb_value_test.cpp
@@ -23,6 +23,7 @@
#include "gtest/gtest_pred_impl.h"
#include "vec/columns/column_string.h"
#include "vec/common/string_ref.h"
+#include "vec/core/block.h"
#include "vec/core/columns_with_type_and_name.h"
#include "vec/data_types/serde/data_type_serde.h"
#include "vec/olap/olap_data_convertor.h"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]