eldenmoon commented on code in PR #48401:
URL: https://github.com/apache/doris/pull/48401#discussion_r1972954440


##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -469,39 +462,143 @@ void 
SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr&
 #ifndef NDEBUG
     var.check_consistency();
 #endif
-    _sparse_column->clear();
+    // _sparse_column->clear();
 }
 
-Status SparseColumnExtractReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
-                                             bool* has_null) {
-    _sparse_column->clear();
-    RETURN_IF_ERROR(_sparse_column_reader->next_batch(n, _sparse_column, 
has_null));
-    const auto& offsets = assert_cast<const 
vectorized::ColumnMap&>(*_sparse_column).get_offsets();
-    // Check if we don't have any paths in shared data in current range.
-    if (offsets.back() == offsets[-1]) {
-        dst->insert_many_defaults(*n);
-    } else {
-        _fill_path_column(dst);
+Status SparseColumnMergeReader::seek_to_first() {
+    RETURN_IF_ERROR(_sparse_column_reader->seek_to_first());
+    for (auto& entry : _src_subcolumns_for_sparse) {
+        RETURN_IF_ERROR(entry->data.iterator->seek_to_first());
     }
     return Status::OK();
 }
 
-Status SparseColumnExtractReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
-                                                 vectorized::MutableColumnPtr& 
dst) {
-    _sparse_column->clear();
-    RETURN_IF_ERROR(_sparse_column_reader->read_by_rowids(rowids, count, 
_sparse_column));
-    const auto& offsets = assert_cast<const 
vectorized::ColumnMap&>(*_sparse_column).get_offsets();
-    // Check if we don't have any paths in shared data in current range.
-    if (offsets.back() == offsets[-1]) {
-        dst->insert_many_defaults(count);
-    } else {
-        _fill_path_column(dst);
+Status SparseColumnMergeReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(_sparse_column_reader->seek_to_ordinal(ord));
+    for (auto& entry : _src_subcolumns_for_sparse) {
+        RETURN_IF_ERROR(entry->data.iterator->seek_to_ordinal(ord));
     }
     return Status::OK();
 }
 
-ordinal_t SparseColumnExtractReader::get_current_ordinal() const {
-    return _sparse_column_reader->get_current_ordinal();
+Status SparseColumnMergeReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(_sparse_column_reader->init(opts));
+    for (auto& entry : _src_subcolumns_for_sparse) {
+        entry->data.serde = entry->data.type->get_serde();
+        RETURN_IF_ERROR(entry->data.iterator->init(opts));
+        const auto& path = entry->path.get_path();
+        _sorted_src_subcolumn_for_sparse.emplace_back(StringRef(path.data(), 
path.size()), entry);
+    }
+    return Status::OK();
+}
+
+void SparseColumnMergeReader::_serialize_nullable_column_to_sparse(
+        const SubstreamReaderTree::Node* src_subcolumn,
+        vectorized::ColumnString& dst_sparse_column_paths,
+        vectorized::ColumnString& dst_sparse_column_values, const StringRef& 
src_path, size_t row) {
+    // every subcolumn is always Nullable
+    const auto& nullable_serde =
+            
assert_cast<vectorized::DataTypeNullableSerDe&>(*src_subcolumn->data.serde);
+    const auto& nullable_col =
+            assert_cast<const vectorized::ColumnNullable&, 
TypeCheckOnRelease::DISABLE>(
+                    *src_subcolumn->data.column);
+    if (nullable_col.is_null_at(row)) {
+        return;
+    }
+    // insert key
+    dst_sparse_column_paths.insert_data(src_path.data, src_path.size);
+    // insert value
+    vectorized::ColumnString::Chars& chars = 
dst_sparse_column_values.get_chars();
+    
nullable_serde.get_nested_serde()->write_one_cell_to_binary(nullable_col.get_nested_column(),
+                                                                chars, row);
+    dst_sparse_column_values.get_offsets().push_back(chars.size());
+}
+
+void 
SparseColumnMergeReader::_process_data_without_sparse_column(vectorized::MutableColumnPtr&
 dst,
+                                                                  size_t 
num_rows) {
+    if (_src_subcolumns_for_sparse.empty()) {
+        dst->insert_many_defaults(num_rows);
+    } else {
+        // merge subcolumns to sparse column
+        // Otherwise insert required src dense columns into sparse column.
+        auto& map_column = assert_cast<vectorized::ColumnMap&>(*dst);
+        auto& sparse_column_keys = 
assert_cast<vectorized::ColumnString&>(map_column.get_keys());
+        auto& sparse_column_values =
+                
assert_cast<vectorized::ColumnString&>(map_column.get_values());
+        auto& sparse_column_offsets = map_column.get_offsets();
+        for (size_t i = 0; i != num_rows; ++i) {
+            // Paths in sorted_src_subcolumn_for_sparse_column are already 
sorted.
+            for (const auto& entry : _src_subcolumns_for_sparse) {

Review Comment:
   sorted



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to