This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new 71440186dbf [fix](compaction) fix compaction check on variant (#49328) 71440186dbf is described below commit 71440186dbf8dfbefc05082443a4f84d5f671a69 Author: Sun Chenyang <suncheny...@selectdb.com> AuthorDate: Fri Mar 21 10:50:18 2025 +0800 [fix](compaction) fix compaction check on variant (#49328) --- be/src/olap/compaction.cpp | 8 +++----- be/src/vec/columns/column_object.cpp | 3 ++- be/src/vec/columns/column_object.h | 12 +++--------- be/src/vec/common/schema_util.cpp | 19 +++++++++++++++---- be/src/vec/common/schema_util.h | 2 +- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 14fb999c438..0088228be4c 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -1365,11 +1365,9 @@ Status Compaction::check_correctness() { _tablet->tablet_id(), _input_row_num, _stats.merged_rows, _stats.filtered_rows, _output_rowset->num_rows()); } - if (_tablet->keys_type() == KeysType::DUP_KEYS) { - // only check path stats for dup_keys since the rows may be merged in other models - RETURN_IF_ERROR(vectorized::schema_util::check_path_stats(_input_rowsets, _output_rowset, - _tablet->tablet_id())); - } + // check variant column path stats + RETURN_IF_ERROR( + vectorized::schema_util::check_path_stats(_input_rowsets, _output_rowset, _tablet)); return Status::OK(); } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 15b24589d46..896fad0795d 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -636,7 +636,8 @@ void ColumnObject::Subcolumn::insert_range_from(const Subcolumn& src, size_t sta } bool ColumnObject::Subcolumn::is_finalized() const { - return current_num_of_defaults == 0 && num_of_defaults_in_prefix == 0 && (data.empty() || (data.size() == 1)); + return current_num_of_defaults == 0 && num_of_defaults_in_prefix == 0 && + (data.empty() || (data.size() == 1)); } template <typename Func> diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 422bb38a704..10078c0ede5 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -154,17 +154,11 @@ public: void insert_default(); - void increment_default_counter() { - ++current_num_of_defaults; - } + void increment_default_counter() { ++current_num_of_defaults; } - void reset_current_num_of_defaults() { - current_num_of_defaults = 0; - } + void reset_current_num_of_defaults() { current_num_of_defaults = 0; } - size_t cur_num_of_defaults() { - return current_num_of_defaults; - } + size_t cur_num_of_defaults() { return current_num_of_defaults; } void insert_many_defaults(size_t length); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index a7d34e80961..7f775e2b76d 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -729,7 +729,17 @@ void get_subpaths(const TabletColumn& variant, } Status check_path_stats(const std::vector<RowsetSharedPtr>& intputs, RowsetSharedPtr output, - int64_t tablet_id) { + BaseTabletSPtr tablet) { + // only check path stats for dup_keys since the rows may be merged in other models + if (tablet->keys_type() != KeysType::DUP_KEYS) { + return Status::OK(); + } + // if there is a delete predicate in the input rowsets, we skip the path stats check + for (auto& rowset : intputs) { + if (rowset->rowset_meta()->has_delete_predicate()) { + return Status::OK(); + } + } std::unordered_map<int32_t, PathToNoneNullValues> original_uid_to_path_stats; for (const auto& rs : intputs) { RETURN_IF_ERROR(collect_path_stats(rs, original_uid_to_path_stats)); @@ -739,11 +749,11 @@ Status check_path_stats(const std::vector<RowsetSharedPtr>& intputs, RowsetShare for (const auto& [uid, stats] : original_uid_to_path_stats) { if (output_uid_to_path_stats.find(uid) == output_uid_to_path_stats.end()) { return Status::InternalError("Path stats not found for uid {}, tablet_id {}", uid, - tablet_id); + tablet->tablet_id()); } if (stats.size() != output_uid_to_path_stats.at(uid).size()) { return Status::InternalError("Path stats size not match for uid {}, tablet_id {}", uid, - tablet_id); + tablet->tablet_id()); } for (const auto& [path, size] : stats) { if (output_uid_to_path_stats.at(uid).at(path) != size) { @@ -751,7 +761,8 @@ Status check_path_stats(const std::vector<RowsetSharedPtr>& intputs, RowsetShare "Path stats not match for uid {} with path `{}`, input size {}, output " "size {}, " "tablet_id {}", - uid, path, size, output_uid_to_path_stats.at(uid).at(path), tablet_id); + uid, path, size, output_uid_to_path_stats.at(uid).at(path), + tablet->tablet_id()); } } } diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index a4101883fc9..4c1ee876254 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -139,7 +139,7 @@ Status get_compaction_schema(const std::vector<RowsetSharedPtr>& rowsets, Tablet // Check if the path stats are consistent between inputs rowsets and output rowset. // Used to check the correctness of compaction. Status check_path_stats(const std::vector<RowsetSharedPtr>& intputs, RowsetSharedPtr output, - int64_t tablet_id); + BaseTabletSPtr tablet); // Calculate statistics about variant data paths from the encoded sparse column void calculate_variant_stats(const IColumn& encoded_sparse_column, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org