xiaokang commented on code in PR #28673: URL: https://github.com/apache/doris/pull/28673#discussion_r1479142081
########## be/src/vec/columns/subcolumn_tree.h: ########## @@ -55,7 +55,9 @@ class SubcolumnsTree { bool is_nested() const { return kind == NESTED; } bool is_scalar() const { return kind == SCALAR; } - bool is_scalar_without_children() const { return kind == SCALAR && children.empty(); } + + bool is_leaf_node() const { return kind == SCALAR && children.empty(); } + bool is_none_leaf_node() const { return !children.empty(); } Review Comment: It's strange that is_none_leaf_node() != !is_leaf_node() ########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -150,6 +150,10 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t column_id, init_column_meta(meta->add_children_columns(), column_id, column.get_sub_column(i), tablet_schema); } + // add sparse column to footer + for (uint32_t i = 0; i < column.num_sparse_columns(); i++) { + init_column_meta(meta->add_sparse_columns(), -1, column.sparse_column_at(i), tablet_schema); Review Comment: Is column_id == -1 OK? ########## be/src/vec/columns/subcolumn_tree.h: ########## @@ -55,7 +55,9 @@ class SubcolumnsTree { bool is_nested() const { return kind == NESTED; } bool is_scalar() const { return kind == SCALAR; } - bool is_scalar_without_children() const { return kind == SCALAR && children.empty(); } + + bool is_leaf_node() const { return kind == SCALAR && children.empty(); } + bool is_none_leaf_node() const { return !children.empty(); } Review Comment: bool is_leaf_node() const { return children.empty(); } ########## gensrc/proto/olap_file.proto: ########## @@ -293,6 +293,8 @@ message ColumnPB { optional bool result_is_nullable = 19; // persist info for PathInData that represents path in document, e.g. JSON. optional segment_v2.ColumnPathInfo column_path_info = 20; + // sparse column within a variant column + repeated ColumnPB sparse_column = 21; Review Comment: sparse_columns ########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -467,36 +507,29 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, return Status::OK(); } - // Init iterators with extra path info. - // TODO If this segment does not contain any data correspond to the relatate path, - // then we could optimize to generate a default iterator - // This file doest not contain this column, so only read from sparse column - // to avoid read amplification - if (node != nullptr && node->is_scalar() && node->children.empty()) { + if (node != nullptr && node->is_leaf_node() && sparse_node == nullptr) { + // Node contains column without any child sub columns and no corresponding sparse columns // Direct read extracted columns const auto* node = _sub_column_tree.find_leaf(tablet_column.path_info()); ColumnIterator* it; RETURN_IF_ERROR(node->data.reader->new_iterator(&it)); iter->reset(it); - } else if (node != nullptr && !node->children.empty()) { + } else if ((node != nullptr && node->is_none_leaf_node()) || + (node != nullptr && sparse_node != nullptr)) { + // Node contains column with children columns or has correspoding sparse columns // Create reader with hirachical data RETURN_IF_ERROR( HierarchicalDataReader::create(iter, tablet_column.path_info(), node, root)); } else { - // If file only exist column `v.a` and `v` but target path is `v.b`, read only read and parse root column - if (root == nullptr) { + // No such node, read from either sparse column or default column Review Comment: It's not exactly precise for else branch. Suggest more clear branches: ``` if (node) { // Node contains column if (node->is_leaf_node() && sparse_node == nullptr) { // Node contains column without any child sub columns and no corresponding sparse columns } else if (node->is_none_leaf_node() || sparse_node != nullptr) { // Node contains column with children columns or has correspoding sparse columns } else { // should not reach this branch } } else { // No such node, read from either sparse column or default column } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org