This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new ebb21ef0310 branch-2.1: [Fix](merge-on-write) Add defensive check 
before partial update #44687 (#45086)
ebb21ef0310 is described below

commit ebb21ef03102e6160c7900f1be81149c6319abeb
Author: bobhan1 <bao...@selectdb.com>
AuthorDate: Fri Dec 6 17:16:42 2024 +0800

    branch-2.1: [Fix](merge-on-write) Add defensive check before partial update 
#44687 (#45086)
    
    pick https://github.com/apache/doris/pull/44687
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 42 ++++++++++++++++++++--
 be/src/olap/rowset/segment_v2/segment_writer.h     |  2 ++
 .../rowset/segment_v2/vertical_segment_writer.cpp  | 40 +++++++++++++++++++--
 .../rowset/segment_v2/vertical_segment_writer.h    |  2 ++
 4 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index e1696340a36..7e5a51f55ce 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -354,6 +354,40 @@ void 
SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) {
                << watch.elapsed_time() / 1000;
 }
 
+Status SegmentWriter::partial_update_preconditions_check(size_t row_pos) {
+    if (!_is_mow()) {
+        auto msg = fmt::format(
+                "Can only do partial update on merge-on-write unique table, 
but found: "
+                "keys_type={}, _opts.enable_unique_key_merge_on_write={}, 
tablet_id={}",
+                _tablet_schema->keys_type(), 
_opts.enable_unique_key_merge_on_write,
+                _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    if (_opts.rowset_ctx->partial_update_info == nullptr) {
+        auto msg =
+                fmt::format("partial_update_info should not be nullptr, please 
check, tablet_id={}",
+                            _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    if (!_opts.rowset_ctx->partial_update_info->is_partial_update) {
+        auto msg = fmt::format(
+                "in fixed partial update code, but is_partial_update=false, 
please check, "
+                "tablet_id={}",
+                _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    if (row_pos != 0) {
+        auto msg = fmt::format("row_pos should be 0, but found {}, 
tablet_id={}", row_pos,
+                               _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    return Status::OK();
+}
+
 // for partial update, we should do following steps to fill content of block:
 // 1. set block data to data convertor, and get all key_column's converted 
slice
 // 2. get pk of input block, and read missing columns
@@ -372,9 +406,7 @@ Status 
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
                             block->columns(), 
_tablet_schema->num_key_columns(),
                             _tablet_schema->num_columns()));
     }
-    DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write);
-
-    DCHECK(_opts.rowset_ctx->partial_update_info);
+    RETURN_IF_ERROR(partial_update_preconditions_check(row_pos));
     // find missing column cids
     const auto& missing_cids = 
_opts.rowset_ctx->partial_update_info->missing_cids;
     const auto& including_cids = 
_opts.rowset_ctx->partial_update_info->update_cids;
@@ -1281,5 +1313,9 @@ Status SegmentWriter::_generate_short_key_index(
     return Status::OK();
 }
 
+inline bool SegmentWriter::_is_mow() const {
+    return _tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write;
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h 
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 8f9e09ed01a..5f5ab7bc7fe 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -96,6 +96,7 @@ public:
     Status append_row(const RowType& row);
 
     Status append_block(const vectorized::Block* block, size_t row_pos, size_t 
num_rows);
+    Status partial_update_preconditions_check(size_t row_pos);
     Status append_block_with_partial_content(const vectorized::Block* block, 
size_t row_pos,
                                              size_t num_rows);
 
@@ -180,6 +181,7 @@ private:
             vectorized::IOlapColumnDataAccessor* seq_column, size_t num_rows, 
bool need_sort);
     Status 
_generate_short_key_index(std::vector<vectorized::IOlapColumnDataAccessor*>& 
key_columns,
                                      size_t num_rows, const 
std::vector<size_t>& short_key_pos);
+    bool _is_mow() const;
 
 private:
     uint32_t _segment_id;
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 0feb769638a..1be1f1ec180 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -304,6 +304,39 @@ void 
VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& bl
                << watch.elapsed_time() / 1000;
 }
 
+Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t 
row_pos) {
+    if (!_is_mow()) {
+        auto msg = fmt::format(
+                "Can only do partial update on merge-on-write unique table, 
but found: "
+                "keys_type={}, _opts.enable_unique_key_merge_on_write={}, 
tablet_id={}",
+                _tablet_schema->keys_type(), 
_opts.enable_unique_key_merge_on_write,
+                _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    if (_opts.rowset_ctx->partial_update_info == nullptr) {
+        auto msg =
+                fmt::format("partial_update_info should not be nullptr, please 
check, tablet_id={}",
+                            _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    if (!_opts.rowset_ctx->partial_update_info->is_partial_update) {
+        auto msg = fmt::format(
+                "in partial update code, but is_partial_update=false, please 
check, tablet_id={}",
+                _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    if (row_pos != 0) {
+        auto msg = fmt::format("row_pos should be 0, but found {}, 
tablet_id={}", row_pos,
+                               _tablet->tablet_id());
+        DCHECK(false) << msg;
+        return Status::InternalError<false>(msg);
+    }
+    return Status::OK();
+}
+
 // for partial update, we should do following steps to fill content of block:
 // 1. set block data to data convertor, and get all key_column's converted 
slice
 // 2. get pk of input block, and read missing columns
@@ -318,8 +351,7 @@ Status 
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
         return Status::NotSupported("append_block_with_partial_content");
     }
 
-    DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write);
-    DCHECK(_opts.rowset_ctx->partial_update_info != nullptr);
+    RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos));
 
     auto tablet = static_cast<Tablet*>(_tablet.get());
     // create full block and fill with input columns
@@ -1209,5 +1241,9 @@ void VerticalSegmentWriter::_set_max_key(const Slice& 
key) {
     _max_key.append(key.get_data(), key.get_size());
 }
 
+inline bool VerticalSegmentWriter::_is_mow() const {
+    return _tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write;
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
index 8fd854c3e95..1713ece23e6 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
@@ -149,12 +149,14 @@ private:
     void _set_min_key(const Slice& key);
     void _set_max_key(const Slice& key);
     void _serialize_block_to_row_column(vectorized::Block& block);
+    Status _partial_update_preconditions_check(size_t row_pos);
     Status _append_block_with_partial_content(RowsInBlock& data, 
vectorized::Block& full_block);
     Status _append_block_with_variant_subcolumns(RowsInBlock& data);
     Status _fill_missing_columns(vectorized::MutableColumns& 
mutable_full_columns,
                                  const std::vector<bool>& 
use_default_or_null_flag,
                                  bool has_default_or_nullable, const size_t& 
segment_start_pos,
                                  const vectorized::Block* block);
+    bool _is_mow() const;
 
 private:
     uint32_t _segment_id;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to