This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 91675d0ef67 [branch-2.0](cherry-pick) compaction may cause update failue(#31551) (#31556) 91675d0ef67 is described below commit 91675d0ef67c3cbddbb8caf9e219af93d27c0ac1 Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com> AuthorDate: Thu Mar 7 11:36:11 2024 +0800 [branch-2.0](cherry-pick) compaction may cause update failue(#31551) (#31556) --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 18 +++++++++++++++++- be/src/olap/tablet.cpp | 10 +++++++++- be/src/olap/tablet.h | 4 ++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index a777e358fc3..c1c124658b7 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -400,7 +400,23 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* std::vector<RowsetSharedPtr> specified_rowsets; { std::shared_lock rlock(_tablet->get_header_lock()); - specified_rowsets = _tablet->get_rowset_by_ids(&_mow_context->rowset_ids); + // Under normal circumstances, `get_rowset_by_ids` does not need to consider the stale + // rowset, in other word, if a rowset id is not found in the normal rowset, we can ignore + // it. This is because even if we handle stale rowset here, we need to recalculate the + // new rowset generated by the corresponding compaction in the publish phase. + // However, for partial update, ignoring the stale rowset may cause some keys to not be + // found in the flush phase (lookup_row_key returns KEY_NOT_FOUND), and thus be mistaken + // as new keys in the flush phase, which will cause the load to fail in the following + // two cases: + // 1. when strict_mode is enabled, new keys are not allowed to be added. + // 2. Some columns that need to be filled are neither nullable nor have a default value, + // in which case the value of the field cannot be filled as a new key, leading to a + // failure of the load. + bool should_include_stale = + _opts.rowset_ctx->partial_update_info->is_strict_mode || + !_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update; + specified_rowsets = + _tablet->get_rowset_by_ids(&_mow_context->rowset_ids, should_include_stale); if (_opts.rowset_ctx->partial_update_info->is_strict_mode && specified_rowsets.size() != _mow_context->rowset_ids.size()) { // Only when this is a strict mode partial update that missing rowsets here will lead to problems. diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 47e089add41..0b92c109901 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -3193,7 +3193,7 @@ Status Tablet::calc_delete_bitmap(RowsetSharedPtr rowset, } std::vector<RowsetSharedPtr> Tablet::get_rowset_by_ids( - const RowsetIdUnorderedSet* specified_rowset_ids) { + const RowsetIdUnorderedSet* specified_rowset_ids, bool include_stale) { std::vector<RowsetSharedPtr> rowsets; for (auto& rs : _rs_version_map) { if (!specified_rowset_ids || @@ -3201,6 +3201,14 @@ std::vector<RowsetSharedPtr> Tablet::get_rowset_by_ids( rowsets.push_back(rs.second); } } + if (include_stale && specified_rowset_ids != nullptr && + rowsets.size() != specified_rowset_ids->size()) { + for (auto& rs : _stale_rs_version_map) { + if (specified_rowset_ids->find(rs.second->rowset_id()) != specified_rowset_ids->end()) { + rowsets.push_back(rs.second); + } + } + } std::sort(rowsets.begin(), rowsets.end(), [](RowsetSharedPtr& lhs, RowsetSharedPtr& rhs) { return lhs->end_version() > rhs->end_version(); }); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 0e33bf71030..d94990e4071 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -459,8 +459,8 @@ public: DeleteBitmapPtr delete_bitmap, int64_t version, CalcDeleteBitmapToken* token, RowsetWriter* rowset_writer = nullptr); - std::vector<RowsetSharedPtr> get_rowset_by_ids( - const RowsetIdUnorderedSet* specified_rowset_ids); + std::vector<RowsetSharedPtr> get_rowset_by_ids(const RowsetIdUnorderedSet* specified_rowset_ids, + bool include_stale = false); Status calc_segment_delete_bitmap(RowsetSharedPtr rowset, const segment_v2::SegmentSharedPtr& seg, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org