This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 415721ef20 [enhancement](pred column) improve predicate column insert 
performance (#12690)
415721ef20 is described below

commit 415721ef2021fdf428bb7f94a796f9cb02c67aa6
Author: yiguolei <676222...@qq.com>
AuthorDate: Mon Sep 19 10:53:48 2022 +0800

    [enhancement](pred column) improve predicate column insert performance 
(#12690)
    
    
    
    Co-authored-by: yiguolei <yiguo...@gmail.com>
---
 be/src/vec/columns/predicate_column.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/columns/predicate_column.h 
b/be/src/vec/columns/predicate_column.h
index 6662545b2f..68a4095450 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -90,11 +90,14 @@ private:
 
     void insert_string_to_res_column(const uint16_t* sel, size_t sel_size,
                                      vectorized::ColumnString* res_ptr) {
+        StringRef refs[sel_size];
         for (size_t i = 0; i < sel_size; i++) {
             uint16_t n = sel[i];
             auto& sv = reinterpret_cast<StringValue&>(data[n]);
-            res_ptr->insert_data(sv.ptr, sv.len);
+            refs[i].data = sv.ptr;
+            refs[i].size = sv.len;
         }
+        res_ptr->insert_many_strings(refs, sel_size);
     }
 
     void insert_decimal_to_res_column(const uint16_t* sel, size_t sel_size,
@@ -261,6 +264,9 @@ public:
 
     void insert_many_binary_data(char* data_array, uint32_t* len_array,
                                  uint32_t* start_offset_array, size_t num) 
override {
+        if (num == 0) {
+            return;
+        }
         if constexpr (std::is_same_v<T, StringValue>) {
             if (_pool == nullptr) {
                 _pool.reset(new MemPool());
@@ -272,13 +278,14 @@ public:
             }
 
             char* destination = (char*)_pool->allocate(total_mem_size);
+            memcpy(destination, data_array, total_mem_size);
+            // Resize the underline data to allow data copy directly
+            size_t org_elem_num = data.size();
+            data.resize(org_elem_num + num);
             for (size_t i = 0; i < num; i++) {
-                uint32_t len = len_array[i];
-                uint32_t start_offset = start_offset_array[i];
-                memcpy(destination, data_array + start_offset, len);
-                StringValue sv(destination, len);
-                data.push_back_without_reserve(sv);
-                destination += len;
+                data[org_elem_num + i].ptr = destination;
+                data[org_elem_num + i].len = len_array[i];
+                destination += len_array[i];
             }
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to