eldenmoon commented on code in PR #39022: URL: https://github.com/apache/doris/pull/39022#discussion_r1721171203
########## be/src/olap/rowset/segment_v2/hierarchical_data_reader.h: ########## @@ -123,35 +113,97 @@ class HierarchicalDataReader : public ColumnIterator { })); // build variant as container - auto container = vectorized::ColumnObject::create(true, false); - auto& container_variant = assert_cast<vectorized::ColumnObject&>(*container); + auto container = ColumnObject::create(true, false); + auto& container_variant = assert_cast<ColumnObject&>(*container); // add root first - if (_path.get_parts().size() == 1) { - auto& root_var = - _root_reader->column->is_nullable() - ? assert_cast<vectorized::ColumnObject&>( - assert_cast<vectorized::ColumnNullable&>( - *_root_reader->column) - .get_nested_column()) - : assert_cast<vectorized::ColumnObject&>(*_root_reader->column); + if (_path.get_parts().size() == 1 && _root_reader) { + auto& root_var = _root_reader->column->is_nullable() + ? assert_cast<ColumnObject&>( + assert_cast<ColumnNullable&>(*_root_reader->column) + .get_nested_column()) + : assert_cast<ColumnObject&>(*_root_reader->column); auto column = root_var.get_root(); auto type = root_var.get_root_type(); container_variant.add_sub_column({}, std::move(column), type); } - + // parent path -> subcolumns + std::map<PathInData, PathsWithColumnAndType> nested_subcolumns; + PathsWithColumnAndType non_nested_subcolumns; RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { - vectorized::MutableColumnPtr column = node.data.column->get_ptr(); - bool add = container_variant.add_sub_column( - node.path.copy_pop_nfront(_path.get_parts().size()), std::move(column), - node.data.type); - if (!add) { - return Status::InternalError("Duplicated {}, type {}", node.path.get_path(), - node.data.type->get_name()); + MutableColumnPtr column = node.data.column->get_ptr(); + PathInData real_path = node.path.copy_pop_nfront(_path.get_parts().size()); + + if (node.path.has_nested_part()) { + CHECK_EQ(getTypeName(remove_nullable(node.data.type)->get_type_id()), + getTypeName(TypeIndex::Array)); + PathInData parent_path = node.path.get_nested_prefix_path().copy_pop_nfront( + _path.get_parts().size()); + nested_subcolumns[parent_path].emplace_back(real_path, column->get_ptr(), + node.data.type); + } else { + non_nested_subcolumns.emplace_back(real_path, column->get_ptr(), node.data.type); } return Status::OK(); })); + for (auto& entry : non_nested_subcolumns) { + DCHECK(!entry.path.has_nested_part()); + bool add = container_variant.add_sub_column(entry.path, entry.column->assume_mutable(), + entry.type); + if (!add) { + return Status::InternalError("Duplicated {}, type {}", entry.path.get_path(), + entry.type->get_name()); + } + } + for (auto& entry : nested_subcolumns) { + MutableColumnPtr nested_object = ColumnObject::create(true, false); + const auto* base_array = + check_and_get_column<ColumnArray>(remove_nullable(entry.second[0].column)); + MutableColumnPtr offset = base_array->get_offsets_ptr()->assume_mutable(); + auto* nested_object_ptr = assert_cast<ColumnObject*>(nested_object.get()); + // flatten nested arrays + for (const auto& subcolumn : entry.second) { + const auto& column = subcolumn.column; + const auto& type = subcolumn.type; + if (!remove_nullable(column)->is_column_array()) { + return Status::InvalidArgument( + "Meet none array column when flatten nested array, path {}, type {}", + subcolumn.path.get_path(), subcolumn.type->get_name()); + } + const auto* target_array = + check_and_get_column<ColumnArray>(remove_nullable(subcolumn.column).get()); + if (!base_array->has_equal_offsets(*target_array)) { + return Status::InvalidArgument( + "Meet none equal offsets array when flatten nested array, path {}, " + "type {}", + subcolumn.path.get_path(), subcolumn.type->get_name()); + } + MutableColumnPtr flattend_column = check_and_get_column<ColumnArray>(target_array) Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org