This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 71440186dbf [fix](compaction) fix compaction check on variant (#49328)
71440186dbf is described below

commit 71440186dbf8dfbefc05082443a4f84d5f671a69
Author: Sun Chenyang <suncheny...@selectdb.com>
AuthorDate: Fri Mar 21 10:50:18 2025 +0800

    [fix](compaction) fix compaction check on variant (#49328)
---
 be/src/olap/compaction.cpp           |  8 +++-----
 be/src/vec/columns/column_object.cpp |  3 ++-
 be/src/vec/columns/column_object.h   | 12 +++---------
 be/src/vec/common/schema_util.cpp    | 19 +++++++++++++++----
 be/src/vec/common/schema_util.h      |  2 +-
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 14fb999c438..0088228be4c 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -1365,11 +1365,9 @@ Status Compaction::check_correctness() {
                 _tablet->tablet_id(), _input_row_num, _stats.merged_rows, 
_stats.filtered_rows,
                 _output_rowset->num_rows());
     }
-    if (_tablet->keys_type() == KeysType::DUP_KEYS) {
-        // only check path stats for dup_keys since the rows may be merged in 
other models
-        
RETURN_IF_ERROR(vectorized::schema_util::check_path_stats(_input_rowsets, 
_output_rowset,
-                                                                  
_tablet->tablet_id()));
-    }
+    // check variant column path stats
+    RETURN_IF_ERROR(
+            vectorized::schema_util::check_path_stats(_input_rowsets, 
_output_rowset, _tablet));
     return Status::OK();
 }
 
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 15b24589d46..896fad0795d 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -636,7 +636,8 @@ void ColumnObject::Subcolumn::insert_range_from(const 
Subcolumn& src, size_t sta
 }
 
 bool ColumnObject::Subcolumn::is_finalized() const {
-    return  current_num_of_defaults == 0 && num_of_defaults_in_prefix == 0 && 
(data.empty() || (data.size() == 1));
+    return current_num_of_defaults == 0 && num_of_defaults_in_prefix == 0 &&
+           (data.empty() || (data.size() == 1));
 }
 
 template <typename Func>
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 422bb38a704..10078c0ede5 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -154,17 +154,11 @@ public:
 
         void insert_default();
 
-        void increment_default_counter() {
-            ++current_num_of_defaults;
-        }
+        void increment_default_counter() { ++current_num_of_defaults; }
 
-        void reset_current_num_of_defaults() {
-            current_num_of_defaults = 0;
-        }
+        void reset_current_num_of_defaults() { current_num_of_defaults = 0; }
 
-        size_t cur_num_of_defaults() {
-            return current_num_of_defaults;
-        }
+        size_t cur_num_of_defaults() { return current_num_of_defaults; }
 
         void insert_many_defaults(size_t length);
 
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index a7d34e80961..7f775e2b76d 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -729,7 +729,17 @@ void get_subpaths(const TabletColumn& variant,
 }
 
 Status check_path_stats(const std::vector<RowsetSharedPtr>& intputs, 
RowsetSharedPtr output,
-                        int64_t tablet_id) {
+                        BaseTabletSPtr tablet) {
+    // only check path stats for dup_keys since the rows may be merged in 
other models
+    if (tablet->keys_type() != KeysType::DUP_KEYS) {
+        return Status::OK();
+    }
+    // if there is a delete predicate in the input rowsets, we skip the path 
stats check
+    for (auto& rowset : intputs) {
+        if (rowset->rowset_meta()->has_delete_predicate()) {
+            return Status::OK();
+        }
+    }
     std::unordered_map<int32_t, PathToNoneNullValues> 
original_uid_to_path_stats;
     for (const auto& rs : intputs) {
         RETURN_IF_ERROR(collect_path_stats(rs, original_uid_to_path_stats));
@@ -739,11 +749,11 @@ Status check_path_stats(const 
std::vector<RowsetSharedPtr>& intputs, RowsetShare
     for (const auto& [uid, stats] : original_uid_to_path_stats) {
         if (output_uid_to_path_stats.find(uid) == 
output_uid_to_path_stats.end()) {
             return Status::InternalError("Path stats not found for uid {}, 
tablet_id {}", uid,
-                                         tablet_id);
+                                         tablet->tablet_id());
         }
         if (stats.size() != output_uid_to_path_stats.at(uid).size()) {
             return Status::InternalError("Path stats size not match for uid 
{}, tablet_id {}", uid,
-                                         tablet_id);
+                                         tablet->tablet_id());
         }
         for (const auto& [path, size] : stats) {
             if (output_uid_to_path_stats.at(uid).at(path) != size) {
@@ -751,7 +761,8 @@ Status check_path_stats(const std::vector<RowsetSharedPtr>& 
intputs, RowsetShare
                         "Path stats not match for uid {} with path `{}`, input 
size {}, output "
                         "size {}, "
                         "tablet_id {}",
-                        uid, path, size, 
output_uid_to_path_stats.at(uid).at(path), tablet_id);
+                        uid, path, size, 
output_uid_to_path_stats.at(uid).at(path),
+                        tablet->tablet_id());
             }
         }
     }
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index a4101883fc9..4c1ee876254 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -139,7 +139,7 @@ Status get_compaction_schema(const 
std::vector<RowsetSharedPtr>& rowsets, Tablet
 // Check if the path stats are consistent between inputs rowsets and output 
rowset.
 // Used to check the correctness of compaction.
 Status check_path_stats(const std::vector<RowsetSharedPtr>& intputs, 
RowsetSharedPtr output,
-                        int64_t tablet_id);
+                        BaseTabletSPtr tablet);
 
 // Calculate statistics about variant data paths from the encoded sparse column
 void calculate_variant_stats(const IColumn& encoded_sparse_column,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to