This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d44ec74988 [Enhancement](column) optimize for 
ColumnString::insert_many_dict_data (#12636)
d44ec74988 is described below

commit d44ec749887b058c3ca0a19d6deffb1e1c396976
Author: Pxl <pxl...@qq.com>
AuthorDate: Fri Sep 16 10:23:04 2022 +0800

    [Enhancement](column) optimize for ColumnString::insert_many_dict_data 
(#12636)
    
    optimize for ColumnString::insert_many_dict_data
---
 be/src/vec/columns/column_string.h | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 8dc597e18c..2adb32dd99 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -191,17 +191,33 @@ public:
             uint32_t len = strings[i].size;
             if (len) {
                 memcpy(data + offset, strings[i].data, len);
+                offset += len;
             }
-            offset += len;
             offsets.push_back(offset);
         }
     }
 
     void insert_many_dict_data(const int32_t* data_array, size_t start_index, 
const StringRef* dict,
                                size_t num, uint32_t /*dict_num*/) override {
-        for (size_t end_index = start_index + num; start_index < end_index; 
++start_index) {
-            int32_t codeword = data_array[start_index];
-            insert_data(dict[codeword].data, dict[codeword].size);
+        size_t new_size = 0;
+        for (size_t i = start_index; i < start_index + num; i++) {
+            int32_t codeword = data_array[i];
+            new_size += dict[codeword].size;
+        }
+
+        const size_t old_size = chars.size();
+        chars.resize(old_size + new_size);
+
+        Char* data = chars.data();
+        size_t offset = old_size;
+        for (size_t i = start_index; i < start_index + num; i++) {
+            int32_t codeword = data_array[i];
+            uint32_t len = dict[codeword].size;
+            if (len) {
+                memcpy(data + offset, dict[codeword].data, len);
+                offset += len;
+            }
+            offsets.push_back(offset);
         }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to