This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new d5a6106fd40 [pick20][opt](mow) reduce memory usage for mow table compaction (#36865) (#36998) d5a6106fd40 is described below commit d5a6106fd402116797eccde1c92fcc9432a0bd8c Author: camby <camby...@tencent.com> AuthorDate: Mon Jul 1 15:32:21 2024 +0800 [pick20][opt](mow) reduce memory usage for mow table compaction (#36865) (#36998) cherry-pick https://github.com/apache/doris/pull/36865 to branch-2.0 --- be/src/common/config.cpp | 2 + be/src/common/config.h | 2 + be/src/olap/compaction.cpp | 51 +++++++++++++++----------- be/src/olap/tablet.cpp | 8 +++- be/src/olap/utils.h | 2 + regression-test/pipeline/external/conf/be.conf | 1 + regression-test/pipeline/p0/conf/be.conf | 1 + regression-test/pipeline/p1/conf/be.conf | 1 + 8 files changed, 44 insertions(+), 24 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 604535825fb..9e8f226dc0d 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1098,6 +1098,8 @@ DEFINE_mInt64(LZ4_HC_compression_level, "9"); DEFINE_mBool(enable_merge_on_write_correctness_check, "true"); // rowid conversion correctness check when compaction for mow table DEFINE_mBool(enable_rowid_conversion_correctness_check, "false"); +// missing rows correctness check when compaction for mow table +DEFINE_mBool(enable_missing_rows_correctness_check, "false"); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DEFINE_mInt32(mow_publish_max_discontinuous_version_num, "20"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 6e7f2ff490a..35e3a620288 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1144,6 +1144,8 @@ DECLARE_mInt64(LZ4_HC_compression_level); DECLARE_mBool(enable_merge_on_write_correctness_check); // rowid conversion correctness check when compaction for mow table DECLARE_mBool(enable_rowid_conversion_correctness_check); +// missing rows correctness check when compaction for mow table +DECLARE_mBool(enable_missing_rows_correctness_check); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DECLARE_mInt32(mow_publish_max_discontinuous_version_num); diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 5ca06457366..1e1ad3e2775 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -802,8 +802,17 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { _tablet->enable_unique_key_merge_on_write()) { Version version = _tablet->max_version(); DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id()); - std::set<RowLocation> missed_rows; - std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>> location_map; + std::unique_ptr<RowLocationSet> missed_rows; + if (config::enable_missing_rows_correctness_check && !allow_delete_in_cumu_compaction() && + compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION) { + missed_rows = std::make_unique<RowLocationSet>(); + LOG(INFO) << "RowLocation Set inited succ for tablet:" << _tablet->tablet_id(); + } + std::unique_ptr<std::map<RowsetSharedPtr, RowLocationPairList>> location_map; + if (config::enable_rowid_conversion_correctness_check) { + location_map = std::make_unique<std::map<RowsetSharedPtr, RowLocationPairList>>(); + LOG(INFO) << "Location Map inited succ for tablet:" << _tablet->tablet_id(); + } // Convert the delete bitmap of the input rowsets to output rowset. // New loads are not blocked, so some keys of input rowsets might // be deleted during the time. We need to deal with delete bitmap @@ -811,13 +820,12 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { // TODO(LiaoXin): check if there are duplicate keys std::size_t missed_rows_size = 0; _tablet->calc_compaction_output_rowset_delete_bitmap( - _input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows, - &location_map, _tablet->tablet_meta()->delete_bitmap(), + _input_rowsets, _rowid_conversion, 0, version.second + 1, missed_rows.get(), + location_map.get(), _tablet->tablet_meta()->delete_bitmap(), &output_rowset_delete_bitmap); - if (!allow_delete_in_cumu_compaction()) { - missed_rows_size = missed_rows.size(); - if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION && stats != nullptr && - stats->merged_rows != missed_rows_size && + if (missed_rows) { + missed_rows_size = missed_rows->size(); + if (stats != nullptr && stats->merged_rows != missed_rows_size && _tablet->tablet_state() == TABLET_RUNNING) { std::string err_msg = fmt::format( "cumulative compaction: the merged rows({}) is not equal to missed " @@ -829,10 +837,10 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { } } - if (config::enable_rowid_conversion_correctness_check) { - RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); + if (location_map) { + RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, *location_map)); + location_map->clear(); } - location_map.clear(); { std::lock_guard<std::mutex> wrlock_(_tablet->get_rowset_update_lock()); @@ -859,8 +867,8 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { } DeleteBitmap txn_output_delete_bitmap(_tablet->tablet_id()); _tablet->calc_compaction_output_rowset_delete_bitmap( - _input_rowsets, _rowid_conversion, 0, UINT64_MAX, &missed_rows, - &location_map, *it.delete_bitmap.get(), &txn_output_delete_bitmap); + _input_rowsets, _rowid_conversion, 0, UINT64_MAX, missed_rows.get(), + location_map.get(), *it.delete_bitmap.get(), &txn_output_delete_bitmap); if (config::enable_merge_on_write_correctness_check) { RowsetIdUnorderedSet rowsetids; rowsetids.insert(_output_rowset->rowset_id()); @@ -879,21 +887,20 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { // Convert the delete bitmap of the input rowsets to output rowset for // incremental data. _tablet->calc_compaction_output_rowset_delete_bitmap( - _input_rowsets, _rowid_conversion, version.second, UINT64_MAX, &missed_rows, - &location_map, _tablet->tablet_meta()->delete_bitmap(), + _input_rowsets, _rowid_conversion, version.second, UINT64_MAX, + missed_rows.get(), location_map.get(), _tablet->tablet_meta()->delete_bitmap(), &output_rowset_delete_bitmap); - if (!allow_delete_in_cumu_compaction() && - compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION) { - DCHECK_EQ(missed_rows.size(), missed_rows_size); - if (missed_rows.size() != missed_rows_size) { + if (missed_rows) { + DCHECK_EQ(missed_rows->size(), missed_rows_size); + if (missed_rows->size() != missed_rows_size) { LOG(WARNING) << "missed rows don't match, before: " << missed_rows_size - << " after: " << missed_rows.size(); + << " after: " << missed_rows->size(); } } - if (config::enable_rowid_conversion_correctness_check) { - RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); + if (location_map) { + RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, *location_map)); } _tablet->merge_delete_bitmap(output_rowset_delete_bitmap); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 1698ff1603f..2221b94373e 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -3782,7 +3782,9 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap( << " src loaction: |" << src.rowset_id << "|" << src.segment_id << "|" << src.row_id << " version: " << cur_version; - missed_rows->insert(src); + if (missed_rows) { + missed_rows->insert(src); + } continue; } VLOG_DEBUG << "calc_compaction_output_rowset_delete_bitmap dst location: |" @@ -3790,7 +3792,9 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap( << " src location: |" << src.rowset_id << "|" << src.segment_id << "|" << src.row_id << " start version: " << start_version << "end version" << end_version; - (*location_map)[rowset].emplace_back(src, dst); + if (location_map) { + (*location_map)[rowset].emplace_back(src, dst); + } output_rowset_delete_bitmap->add({dst.rowset_id, dst.segment_id, cur_version}, dst.row_id); } diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 619b883c580..cfc6d99ed2f 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -295,6 +295,8 @@ struct RowLocation { } } }; +using RowLocationSet = std::set<RowLocation>; +using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>; struct GlobalRowLoacation { GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid) diff --git a/regression-test/pipeline/external/conf/be.conf b/regression-test/pipeline/external/conf/be.conf index eb4e91730ea..0b435d283b6 100644 --- a/regression-test/pipeline/external/conf/be.conf +++ b/regression-test/pipeline/external/conf/be.conf @@ -76,3 +76,4 @@ max_sys_mem_available_low_water_mark_bytes=69206016 user_files_secure_path=/ enable_merge_on_write_correctness_check=false enable_debug_points=true +enable_missing_rows_correctness_check=true diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf index eb4e91730ea..0b435d283b6 100644 --- a/regression-test/pipeline/p0/conf/be.conf +++ b/regression-test/pipeline/p0/conf/be.conf @@ -76,3 +76,4 @@ max_sys_mem_available_low_water_mark_bytes=69206016 user_files_secure_path=/ enable_merge_on_write_correctness_check=false enable_debug_points=true +enable_missing_rows_correctness_check=true diff --git a/regression-test/pipeline/p1/conf/be.conf b/regression-test/pipeline/p1/conf/be.conf index ae95e4f65a8..29b42961f06 100644 --- a/regression-test/pipeline/p1/conf/be.conf +++ b/regression-test/pipeline/p1/conf/be.conf @@ -72,3 +72,4 @@ enable_feature_binlog=true max_sys_mem_available_low_water_mark_bytes=69206016 enable_merge_on_write_correctness_check=false enable_debug_points=true +enable_missing_rows_correctness_check=true --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org