This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 91675d0ef67 [branch-2.0](cherry-pick) compaction may cause update 
failue(#31551) (#31556)
91675d0ef67 is described below

commit 91675d0ef67c3cbddbb8caf9e219af93d27c0ac1
Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com>
AuthorDate: Thu Mar 7 11:36:11 2024 +0800

    [branch-2.0](cherry-pick) compaction may cause update failue(#31551) 
(#31556)
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp | 18 +++++++++++++++++-
 be/src/olap/tablet.cpp                           | 10 +++++++++-
 be/src/olap/tablet.h                             |  4 ++--
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a777e358fc3..c1c124658b7 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -400,7 +400,23 @@ Status 
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
     std::vector<RowsetSharedPtr> specified_rowsets;
     {
         std::shared_lock rlock(_tablet->get_header_lock());
-        specified_rowsets = 
_tablet->get_rowset_by_ids(&_mow_context->rowset_ids);
+        // Under normal circumstances, `get_rowset_by_ids` does not need to 
consider the stale
+        // rowset, in other word, if a rowset id is not found in the normal 
rowset, we can ignore
+        // it. This is because even if we handle stale rowset here, we need to 
recalculate the
+        // new rowset generated by the corresponding compaction in the publish 
phase.
+        // However, for partial update, ignoring the stale rowset may cause 
some keys to not be
+        // found in the flush phase (lookup_row_key returns KEY_NOT_FOUND), 
and thus be mistaken
+        // as new keys in the flush phase, which will cause the load to fail 
in the following
+        // two cases:
+        // 1. when strict_mode is enabled, new keys are not allowed to be 
added.
+        // 2. Some columns that need to be filled are neither nullable nor 
have a default value,
+        //    in which case the value of the field cannot be filled as a new 
key, leading to a
+        //    failure of the load.
+        bool should_include_stale =
+                _opts.rowset_ctx->partial_update_info->is_strict_mode ||
+                
!_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update;
+        specified_rowsets =
+                _tablet->get_rowset_by_ids(&_mow_context->rowset_ids, 
should_include_stale);
         if (_opts.rowset_ctx->partial_update_info->is_strict_mode &&
             specified_rowsets.size() != _mow_context->rowset_ids.size()) {
             // Only when this is a strict mode partial update that missing 
rowsets here will lead to problems.
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 47e089add41..0b92c109901 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3193,7 +3193,7 @@ Status Tablet::calc_delete_bitmap(RowsetSharedPtr rowset,
 }
 
 std::vector<RowsetSharedPtr> Tablet::get_rowset_by_ids(
-        const RowsetIdUnorderedSet* specified_rowset_ids) {
+        const RowsetIdUnorderedSet* specified_rowset_ids, bool include_stale) {
     std::vector<RowsetSharedPtr> rowsets;
     for (auto& rs : _rs_version_map) {
         if (!specified_rowset_ids ||
@@ -3201,6 +3201,14 @@ std::vector<RowsetSharedPtr> Tablet::get_rowset_by_ids(
             rowsets.push_back(rs.second);
         }
     }
+    if (include_stale && specified_rowset_ids != nullptr &&
+        rowsets.size() != specified_rowset_ids->size()) {
+        for (auto& rs : _stale_rs_version_map) {
+            if (specified_rowset_ids->find(rs.second->rowset_id()) != 
specified_rowset_ids->end()) {
+                rowsets.push_back(rs.second);
+            }
+        }
+    }
     std::sort(rowsets.begin(), rowsets.end(), [](RowsetSharedPtr& lhs, 
RowsetSharedPtr& rhs) {
         return lhs->end_version() > rhs->end_version();
     });
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 0e33bf71030..d94990e4071 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -459,8 +459,8 @@ public:
                               DeleteBitmapPtr delete_bitmap, int64_t version,
                               CalcDeleteBitmapToken* token, RowsetWriter* 
rowset_writer = nullptr);
 
-    std::vector<RowsetSharedPtr> get_rowset_by_ids(
-            const RowsetIdUnorderedSet* specified_rowset_ids);
+    std::vector<RowsetSharedPtr> get_rowset_by_ids(const RowsetIdUnorderedSet* 
specified_rowset_ids,
+                                                   bool include_stale = false);
 
     Status calc_segment_delete_bitmap(RowsetSharedPtr rowset,
                                       const segment_v2::SegmentSharedPtr& seg,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to