eldenmoon commented on code in PR #39022:
URL: https://github.com/apache/doris/pull/39022#discussion_r1721170672


##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:
##########
@@ -123,35 +113,97 @@ class HierarchicalDataReader : public ColumnIterator {
         }));
 
         // build variant as container
-        auto container = vectorized::ColumnObject::create(true, false);
-        auto& container_variant = 
assert_cast<vectorized::ColumnObject&>(*container);
+        auto container = ColumnObject::create(true, false);
+        auto& container_variant = assert_cast<ColumnObject&>(*container);
 
         // add root first
-        if (_path.get_parts().size() == 1) {
-            auto& root_var =
-                    _root_reader->column->is_nullable()
-                            ? assert_cast<vectorized::ColumnObject&>(
-                                      assert_cast<vectorized::ColumnNullable&>(
-                                              *_root_reader->column)
-                                              .get_nested_column())
-                            : 
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
+        if (_path.get_parts().size() == 1 && _root_reader) {
+            auto& root_var = _root_reader->column->is_nullable()
+                                     ? assert_cast<ColumnObject&>(
+                                               
assert_cast<ColumnNullable&>(*_root_reader->column)
+                                                       .get_nested_column())
+                                     : 
assert_cast<ColumnObject&>(*_root_reader->column);
             auto column = root_var.get_root();
             auto type = root_var.get_root_type();
             container_variant.add_sub_column({}, std::move(column), type);
         }
-
+        // parent path -> subcolumns
+        std::map<PathInData, PathsWithColumnAndType> nested_subcolumns;
+        PathsWithColumnAndType non_nested_subcolumns;
         RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
-            vectorized::MutableColumnPtr column = node.data.column->get_ptr();
-            bool add = container_variant.add_sub_column(
-                    node.path.copy_pop_nfront(_path.get_parts().size()), 
std::move(column),
-                    node.data.type);
-            if (!add) {
-                return Status::InternalError("Duplicated {}, type {}", 
node.path.get_path(),
-                                             node.data.type->get_name());
+            MutableColumnPtr column = node.data.column->get_ptr();
+            PathInData real_path = 
node.path.copy_pop_nfront(_path.get_parts().size());
+
+            if (node.path.has_nested_part()) {
+                
CHECK_EQ(getTypeName(remove_nullable(node.data.type)->get_type_id()),
+                         getTypeName(TypeIndex::Array));
+                PathInData parent_path = 
node.path.get_nested_prefix_path().copy_pop_nfront(
+                        _path.get_parts().size());
+                nested_subcolumns[parent_path].emplace_back(real_path, 
column->get_ptr(),
+                                                            node.data.type);
+            } else {
+                non_nested_subcolumns.emplace_back(real_path, 
column->get_ptr(), node.data.type);
             }
             return Status::OK();
         }));
 
+        for (auto& entry : non_nested_subcolumns) {
+            DCHECK(!entry.path.has_nested_part());
+            bool add = container_variant.add_sub_column(entry.path, 
entry.column->assume_mutable(),
+                                                        entry.type);
+            if (!add) {
+                return Status::InternalError("Duplicated {}, type {}", 
entry.path.get_path(),
+                                             entry.type->get_name());
+            }
+        }
+        for (auto& entry : nested_subcolumns) {
+            MutableColumnPtr nested_object = ColumnObject::create(true, false);
+            const auto* base_array =
+                    
check_and_get_column<ColumnArray>(remove_nullable(entry.second[0].column));
+            MutableColumnPtr offset = 
base_array->get_offsets_ptr()->assume_mutable();
+            auto* nested_object_ptr = 
assert_cast<ColumnObject*>(nested_object.get());
+            // flatten nested arrays
+            for (const auto& subcolumn : entry.second) {
+                const auto& column = subcolumn.column;
+                const auto& type = subcolumn.type;
+                if (!remove_nullable(column)->is_column_array()) {
+                    return Status::InvalidArgument(
+                            "Meet none array column when flatten nested array, 
path {}, type {}",
+                            subcolumn.path.get_path(), 
subcolumn.type->get_name());
+                }
+                const auto* target_array =
+                        
check_and_get_column<ColumnArray>(remove_nullable(subcolumn.column).get());
+                if (!base_array->has_equal_offsets(*target_array)) {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to