eldenmoon commented on code in PR #48401: URL: https://github.com/apache/doris/pull/48401#discussion_r1972954440
########## be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp: ########## @@ -469,39 +462,143 @@ void SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr& #ifndef NDEBUG var.check_consistency(); #endif - _sparse_column->clear(); + // _sparse_column->clear(); } -Status SparseColumnExtractReader::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, - bool* has_null) { - _sparse_column->clear(); - RETURN_IF_ERROR(_sparse_column_reader->next_batch(n, _sparse_column, has_null)); - const auto& offsets = assert_cast<const vectorized::ColumnMap&>(*_sparse_column).get_offsets(); - // Check if we don't have any paths in shared data in current range. - if (offsets.back() == offsets[-1]) { - dst->insert_many_defaults(*n); - } else { - _fill_path_column(dst); +Status SparseColumnMergeReader::seek_to_first() { + RETURN_IF_ERROR(_sparse_column_reader->seek_to_first()); + for (auto& entry : _src_subcolumns_for_sparse) { + RETURN_IF_ERROR(entry->data.iterator->seek_to_first()); } return Status::OK(); } -Status SparseColumnExtractReader::read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) { - _sparse_column->clear(); - RETURN_IF_ERROR(_sparse_column_reader->read_by_rowids(rowids, count, _sparse_column)); - const auto& offsets = assert_cast<const vectorized::ColumnMap&>(*_sparse_column).get_offsets(); - // Check if we don't have any paths in shared data in current range. - if (offsets.back() == offsets[-1]) { - dst->insert_many_defaults(count); - } else { - _fill_path_column(dst); +Status SparseColumnMergeReader::seek_to_ordinal(ordinal_t ord) { + RETURN_IF_ERROR(_sparse_column_reader->seek_to_ordinal(ord)); + for (auto& entry : _src_subcolumns_for_sparse) { + RETURN_IF_ERROR(entry->data.iterator->seek_to_ordinal(ord)); } return Status::OK(); } -ordinal_t SparseColumnExtractReader::get_current_ordinal() const { - return _sparse_column_reader->get_current_ordinal(); +Status SparseColumnMergeReader::init(const ColumnIteratorOptions& opts) { + RETURN_IF_ERROR(_sparse_column_reader->init(opts)); + for (auto& entry : _src_subcolumns_for_sparse) { + entry->data.serde = entry->data.type->get_serde(); + RETURN_IF_ERROR(entry->data.iterator->init(opts)); + const auto& path = entry->path.get_path(); + _sorted_src_subcolumn_for_sparse.emplace_back(StringRef(path.data(), path.size()), entry); + } + return Status::OK(); +} + +void SparseColumnMergeReader::_serialize_nullable_column_to_sparse( + const SubstreamReaderTree::Node* src_subcolumn, + vectorized::ColumnString& dst_sparse_column_paths, + vectorized::ColumnString& dst_sparse_column_values, const StringRef& src_path, size_t row) { + // every subcolumn is always Nullable + const auto& nullable_serde = + assert_cast<vectorized::DataTypeNullableSerDe&>(*src_subcolumn->data.serde); + const auto& nullable_col = + assert_cast<const vectorized::ColumnNullable&, TypeCheckOnRelease::DISABLE>( + *src_subcolumn->data.column); + if (nullable_col.is_null_at(row)) { + return; + } + // insert key + dst_sparse_column_paths.insert_data(src_path.data, src_path.size); + // insert value + vectorized::ColumnString::Chars& chars = dst_sparse_column_values.get_chars(); + nullable_serde.get_nested_serde()->write_one_cell_to_binary(nullable_col.get_nested_column(), + chars, row); + dst_sparse_column_values.get_offsets().push_back(chars.size()); +} + +void SparseColumnMergeReader::_process_data_without_sparse_column(vectorized::MutableColumnPtr& dst, + size_t num_rows) { + if (_src_subcolumns_for_sparse.empty()) { + dst->insert_many_defaults(num_rows); + } else { + // merge subcolumns to sparse column + // Otherwise insert required src dense columns into sparse column. + auto& map_column = assert_cast<vectorized::ColumnMap&>(*dst); + auto& sparse_column_keys = assert_cast<vectorized::ColumnString&>(map_column.get_keys()); + auto& sparse_column_values = + assert_cast<vectorized::ColumnString&>(map_column.get_values()); + auto& sparse_column_offsets = map_column.get_offsets(); + for (size_t i = 0; i != num_rows; ++i) { + // Paths in sorted_src_subcolumn_for_sparse_column are already sorted. + for (const auto& entry : _src_subcolumns_for_sparse) { Review Comment: sorted -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org