This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new 1cbd8f6a24 [feature](merge-on-write) add DCHECK in compaction to detect data inconsistency (#16564) (#17166) 1cbd8f6a24 is described below commit 1cbd8f6a24e3122f678324345980333bc35cb5fd Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com> AuthorDate: Mon Feb 27 13:58:46 2023 +0800 [feature](merge-on-write) add DCHECK in compaction to detect data inconsistency (#16564) (#17166) MoW will mark all duplicate primary key as deleted, so we can add a DCHECK while compaction, if MoW's delete bitmap works incorrectly, we're able to detect this kind of issue ASAP. In Debug version, DCHECK will make BE crush, in release version, compaction will fail and finally load will fail due to -235 --- be/src/olap/compaction.cpp | 22 ++++++++++++++++------ be/src/olap/compaction.h | 2 +- be/src/olap/tablet.cpp | 5 ++++- be/src/olap/tablet.h | 2 +- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 6441749536..db6e4fe3ef 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -372,7 +372,7 @@ Status Compaction::do_compaction_impl(int64_t permits) { TRACE("check correctness finished"); // 4. modify rowsets in memory - RETURN_NOT_OK(modify_rowsets()); + RETURN_NOT_OK(modify_rowsets(&stats)); TRACE("modify rowsets finished"); // 5. update last success compaction time @@ -431,9 +431,10 @@ Status Compaction::construct_input_rowset_readers() { return Status::OK(); } -Status Compaction::modify_rowsets() { +Status Compaction::modify_rowsets(const Merger::Statistics* stats) { std::vector<RowsetSharedPtr> output_rowsets; output_rowsets.push_back(_output_rowset); + uint64_t missed_rows = 0; if (_tablet->keys_type() == KeysType::UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write()) { @@ -444,9 +445,9 @@ Status Compaction::modify_rowsets() { // New loads are not blocked, so some keys of input rowsets might // be deleted during the time. We need to deal with delete bitmap // of incremental data later. - _tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets, _rowid_conversion, 0, - version.second + 1, &location_map, - &output_rowset_delete_bitmap); + missed_rows += _tablet->calc_compaction_output_rowset_delete_bitmap( + _input_rowsets, _rowid_conversion, 0, version.second + 1, &location_map, + &output_rowset_delete_bitmap); RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); location_map.clear(); { @@ -455,11 +456,20 @@ Status Compaction::modify_rowsets() { // Convert the delete bitmap of the input rowsets to output rowset for // incremental data. - _tablet->calc_compaction_output_rowset_delete_bitmap( + missed_rows += _tablet->calc_compaction_output_rowset_delete_bitmap( _input_rowsets, _rowid_conversion, version.second, UINT64_MAX, &location_map, &output_rowset_delete_bitmap); RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); + if (compaction_type() == READER_CUMULATIVE_COMPACTION) { + std::string err_msg = + "The merged rows is not equal to missed rows in rowid conversion"; + DCHECK(stats != nullptr || stats->merged_rows == missed_rows) << err_msg; + if (stats != nullptr && stats->merged_rows != missed_rows) { + return Status::InternalError(err_msg); + } + } + _tablet->merge_delete_bitmap(output_rowset_delete_bitmap); RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets, _input_rowsets, true)); } diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index bf11629fac..018a8a5bf5 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -66,7 +66,7 @@ protected: Status do_compaction(int64_t permits); Status do_compaction_impl(int64_t permits); - Status modify_rowsets(); + virtual Status modify_rowsets(const Merger::Statistics* stats = nullptr); void gc_output_rowset(); Status construct_output_rowset_writer(bool is_vertical = false); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 196d8f7cdd..bb6e0ebe1e 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2211,11 +2211,12 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, DeleteBitmapP return Status::OK(); } -void Tablet::calc_compaction_output_rowset_delete_bitmap( +uint64_t Tablet::calc_compaction_output_rowset_delete_bitmap( const std::vector<RowsetSharedPtr>& input_rowsets, const RowIdConversion& rowid_conversion, uint64_t start_version, uint64_t end_version, std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map, DeleteBitmap* output_rowset_delete_bitmap) { + uint64_t missed_rows = 0; RowLocation src; RowLocation dst; for (auto& rowset : input_rowsets) { @@ -2237,6 +2238,7 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap( << " src loaction: |" << src.rowset_id << "|" << src.segment_id << "|" << src.row_id << " version: " << cur_version; + missed_rows++; continue; } VLOG_DEBUG << "calc_compaction_output_rowset_delete_bitmap dst location: |" @@ -2251,6 +2253,7 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap( } } } + return missed_rows; } void Tablet::merge_delete_bitmap(const DeleteBitmap& delete_bitmap) { diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index e439427661..242949b3e1 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -353,7 +353,7 @@ public: Status update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset); Status update_delete_bitmap(const RowsetSharedPtr& rowset, DeleteBitmapPtr delete_bitmap, const RowsetIdUnorderedSet& pre_rowset_ids); - void calc_compaction_output_rowset_delete_bitmap( + uint64_t calc_compaction_output_rowset_delete_bitmap( const std::vector<RowsetSharedPtr>& input_rowsets, const RowIdConversion& rowid_conversion, uint64_t start_version, uint64_t end_version, std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org