amorynan commented on code in PR #23489:
URL: https://github.com/apache/doris/pull/23489#discussion_r1305357065


##########
be/src/olap/rowset/segment_v2/column_writer.cpp:
##########
@@ -904,58 +904,87 @@ size_t ArrayColumnWriter::get_inverted_index_size() {
     return 0;
 }
 
-// Now we can only write data one by one.
+// batch append data for array
 Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
-    size_t remaining = num_rows;
-    const auto* col_cursor = reinterpret_cast<const CollectionValue*>(*ptr);
-    while (remaining > 0) {
-        // TODO llj: bulk write
-        size_t num_written = 1;
-        ordinal_t next_item_ordinal = _item_writer->get_next_rowid();
-        RETURN_IF_ERROR(_offset_writer->append_data_in_current_page(
-                reinterpret_cast<uint8_t*>(&next_item_ordinal), &num_written));
-        if (num_written <
-            1) { // page is full, write first item offset and update current 
length page's start ordinal
-            RETURN_IF_ERROR(_offset_writer->finish_current_page());
-        } else {
-            // write child item.
-            if (_item_writer->is_nullable()) {
-                auto* item_data_ptr = 
const_cast<CollectionValue*>(col_cursor)->mutable_data();
-                for (size_t i = 0; i < col_cursor->length(); ++i) {
-                    
RETURN_IF_ERROR(_item_writer->append(col_cursor->is_null_at(i), item_data_ptr));
-                    item_data_ptr = (uint8_t*)item_data_ptr + 
_item_writer->get_field()->size();
-                }
-            } else {
-                const void* data = col_cursor->data();
-                
RETURN_IF_ERROR(_item_writer->append_data(reinterpret_cast<const 
uint8_t**>(&data),
-                                                          
col_cursor->length()));
-            }
-            if (_opts.inverted_index) {
-                auto writer = 
dynamic_cast<ScalarColumnWriter*>(_item_writer.get());
-                if (writer != nullptr) {
-                    //NOTE: use array field name as index field, but 
item_writer size should be used when moving item_data_ptr
-                    
_inverted_index_builder->add_array_values(_item_writer->get_field()->size(),
-                                                              col_cursor, 1);
-                }
-            }
-        }
-        remaining -= num_written;
-        col_cursor += num_written;
-        *ptr += num_written * sizeof(CollectionValue);
-    }
+    // data_ptr contains
+    // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+    auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+    // total number length
+    size_t element_cnt = size_t((unsigned long)(*data_ptr));
+    auto offset_data = *(data_ptr + 1);
+    const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
 
-    if (is_nullable()) {
-        return write_null_column(num_rows, false);
+    if (element_cnt > 0) {
+        auto data = *(data_ptr + 2);

Review Comment:
   here is array append data logic ! already no inverted index



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to