This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3d758de7a2 [improvement](binlog) gc be binlog metas when tablet is dropped. (#22447) 3d758de7a2 is described below commit 3d758de7a27a228c79628164249e192dc9b8ba22 Author: DeadlineFen <117912096+deadline...@users.noreply.github.com> AuthorDate: Fri Aug 4 14:38:13 2023 +0800 [improvement](binlog) gc be binlog metas when tablet is dropped. (#22447) --- be/src/olap/binlog.h | 5 ++- be/src/olap/olap_meta.cpp | 10 ++++- be/src/olap/olap_meta.h | 3 ++ be/src/olap/rowset/rowset_meta_manager.cpp | 46 +++++++++++++++++++++- be/src/olap/rowset/rowset_meta_manager.h | 11 ++++-- be/src/olap/storage_engine.cpp | 36 +++++++++++++++++ be/src/olap/storage_engine.h | 2 + be/src/olap/tablet.cpp | 2 +- .../org/apache/doris/binlog/BinlogTombstone.java | 17 -------- .../java/org/apache/doris/binlog/DBBinlog.java | 11 ------ 10 files changed, 105 insertions(+), 38 deletions(-) diff --git a/be/src/olap/binlog.h b/be/src/olap/binlog.h index b6b95a9530..57853bd6db 100644 --- a/be/src/olap/binlog.h +++ b/be/src/olap/binlog.h @@ -25,8 +25,9 @@ #include "olap/olap_common.h" namespace doris { -constexpr std::string_view kBinlogPrefix = "binglog_"; +constexpr std::string_view kBinlogPrefix = "binlog_"; constexpr std::string_view kBinlogMetaPrefix = "binlog_meta_"; +constexpr std::string_view kBinlogDataPrefix = "binlog_data_"; inline auto make_binlog_meta_key(std::string_view tablet, int64_t version, std::string_view rowset) { @@ -82,7 +83,7 @@ inline bool starts_with_binlog_meta(std::string_view str) { } inline std::string get_binlog_data_key_from_meta_key(std::string_view meta_key) { - // like "binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7" => "binglog_data-6943f1585fe834b5-e542c2b83a21d0b7" + // like "binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7" => "binlog_data-6943f1585fe834b5-e542c2b83a21d0b7" return fmt::format("{}data_{}", kBinlogPrefix, meta_key.substr(kBinlogMetaPrefix.length())); } } // namespace doris diff --git a/be/src/olap/olap_meta.cpp b/be/src/olap/olap_meta.cpp index 29bb48cf43..f393d4a262 100644 --- a/be/src/olap/olap_meta.cpp +++ b/be/src/olap/olap_meta.cpp @@ -277,12 +277,18 @@ Status OlapMeta::remove(const int column_family_index, const std::vector<std::st Status OlapMeta::iterate(const int column_family_index, const std::string& prefix, std::function<bool(const std::string&, const std::string&)> const& func) { + return iterate(column_family_index, prefix, prefix, func); +} + +Status OlapMeta::iterate(const int column_family_index, const std::string& seek_key, + const std::string& prefix, + std::function<bool(const std::string&, const std::string&)> const& func) { auto& handle = _handles[column_family_index]; std::unique_ptr<Iterator> it(_db->NewIterator(ReadOptions(), handle.get())); - if (prefix == "") { + if (seek_key == "") { it->SeekToFirst(); } else { - it->Seek(prefix); + it->Seek(seek_key); } rocksdb::Status status = it->status(); if (!status.ok()) { diff --git a/be/src/olap/olap_meta.h b/be/src/olap/olap_meta.h index 174f2d065f..504a07f90f 100644 --- a/be/src/olap/olap_meta.h +++ b/be/src/olap/olap_meta.h @@ -60,6 +60,9 @@ public: Status iterate(const int column_family_index, const std::string& prefix, std::function<bool(const std::string&, const std::string&)> const& func); + Status iterate(const int column_family_index, const std::string& seek_key, + const std::string& prefix, + std::function<bool(const std::string&, const std::string&)> const& func); std::string get_root_path() const { return _root_path; } diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp b/be/src/olap/rowset/rowset_meta_manager.cpp index 32cb7c57eb..7047b965dc 100644 --- a/be/src/olap/rowset/rowset_meta_manager.cpp +++ b/be/src/olap/rowset/rowset_meta_manager.cpp @@ -170,7 +170,7 @@ std::vector<std::string> RowsetMetaManager::get_binlog_filenames(OlapMeta* meta, auto traverse_func = [&rowset_id, &num_segments](const std::string& key, const std::string& value) -> bool { VLOG_DEBUG << fmt::format("key:{}, value:{}", key, value); - // key is 'binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593', extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593' + // key is 'binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593', extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593' // check starts with "binlog_meta_" if (!starts_with_binlog_meta(key)) { LOG(WARNING) << fmt::format("invalid binlog meta key:{}", key); @@ -229,7 +229,7 @@ std::pair<std::string, int64_t> RowsetMetaManager::get_binlog_info( auto traverse_func = [&rowset_id, &num_segments](const std::string& key, const std::string& value) -> bool { VLOG_DEBUG << fmt::format("key:{}, value:{}", key, value); - // key is 'binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593', extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593' + // key is 'binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593', extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593' auto pos = key.rfind('_'); if (pos == std::string::npos) { LOG(WARNING) << fmt::format("invalid binlog meta key:{}", key); @@ -283,6 +283,11 @@ Status RowsetMetaManager::remove(OlapMeta* meta, TabletUid tablet_uid, const Row return status; } +Status RowsetMetaManager::remove_binlog(OlapMeta* meta, const std::string& suffix) { + return meta->remove(META_COLUMN_FAMILY_INDEX, + {kBinlogMetaPrefix.data() + suffix, kBinlogDataPrefix.data() + suffix}); +} + Status RowsetMetaManager::traverse_rowset_metas( OlapMeta* meta, std::function<bool(const TabletUid&, const RowsetId&, const std::string&)> const& func) { @@ -307,6 +312,43 @@ Status RowsetMetaManager::traverse_rowset_metas( return status; } +Status RowsetMetaManager::traverse_binlog_metas( + OlapMeta* meta, std::function<bool(const string&, const string&, bool)> const& collector) { + std::pair<std::string, bool> last_info = std::make_pair(kBinlogMetaPrefix.data(), false); + bool seek_found = false; + Status status; + auto traverse_binlog_meta_func = [&last_info, &seek_found, &collector]( + const std::string& key, + const std::string& value) -> bool { + seek_found = true; + auto& [last_prefix, need_collect] = last_info; + size_t pos = key.find('_', kBinlogMetaPrefix.size()); + if (pos == std::string::npos) { + LOG(WARNING) << "invalid binlog meta key: " << key; + return true; + } + std::string_view key_view(key.data(), pos); + std::string_view last_prefix_view(last_prefix.data(), last_prefix.size() - 1); + + if (last_prefix_view != key_view) { + need_collect = collector(key, value, true); + last_prefix = std::string(key_view) + "~"; + } else if (need_collect) { + collector(key, value, false); + } + + return need_collect; + }; + + do { + seek_found = false; + status = meta->iterate(META_COLUMN_FAMILY_INDEX, last_info.first, kBinlogMetaPrefix.data(), + traverse_binlog_meta_func); + } while (status.ok() && seek_found); + + return status; +} + Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta, const std::string& rowset_meta_path) { std::ifstream infile(rowset_meta_path); diff --git a/be/src/olap/rowset/rowset_meta_manager.h b/be/src/olap/rowset/rowset_meta_manager.h index e859b207e9..f0e834790b 100644 --- a/be/src/olap/rowset/rowset_meta_manager.h +++ b/be/src/olap/rowset/rowset_meta_manager.h @@ -64,9 +64,14 @@ public: static Status remove(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id); - static Status traverse_rowset_metas( - OlapMeta* meta, - std::function<bool(const TabletUid&, const RowsetId&, const std::string&)> const& func); + static Status remove_binlog(OlapMeta* meta, const std::string& suffix); + + static Status traverse_rowset_metas(OlapMeta* meta, + std::function<bool(const TabletUid&, const RowsetId&, + const std::string&)> const& collector); + + static Status traverse_binlog_metas( + OlapMeta* meta, std::function<bool(const string&, const string&, bool)> const& func); static Status load_json_rowset_meta(OlapMeta* meta, const std::string& rowset_meta_path); diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 340f02b65b..fb07235702 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -693,6 +693,9 @@ Status StorageEngine::start_trash_sweep(double* usage, bool ignore_guard) { // clean unused rowset metas in OlapMeta _clean_unused_rowset_metas(); + // clean unused binlog metas in OlapMeta + _clean_unused_binlog_metas(); + // cleand unused delete bitmap for deleted tablet _clean_unused_delete_bitmap(); @@ -774,6 +777,39 @@ void StorageEngine::_clean_unused_rowset_metas() { } } +void StorageEngine::_clean_unused_binlog_metas() { + std::vector<std::string> unused_binlog_key_suffixes; + auto unused_binlog_collector = [this, &unused_binlog_key_suffixes](const std::string& key, + const std::string& value, + bool need_check) -> bool { + if (need_check) { + BinlogMetaEntryPB binlog_meta_pb; + if (UNLIKELY(!binlog_meta_pb.ParseFromString(value))) { + LOG(WARNING) << "parse rowset meta string failed for binlog meta key: " << key; + } else if (_tablet_manager->get_tablet(binlog_meta_pb.tablet_id()) == nullptr) { + LOG(INFO) << "failed to find tablet " << binlog_meta_pb.tablet_id() + << " for binlog rowset: " << binlog_meta_pb.rowset_id() + << ", tablet may be dropped"; + } else { + return false; + } + } + + unused_binlog_key_suffixes.emplace_back(key.substr(kBinlogMetaPrefix.size())); + return true; + }; + auto data_dirs = get_stores(); + for (auto data_dir : data_dirs) { + RowsetMetaManager::traverse_binlog_metas(data_dir->get_meta(), unused_binlog_collector); + for (const auto& suffix : unused_binlog_key_suffixes) { + RowsetMetaManager::remove_binlog(data_dir->get_meta(), suffix); + } + LOG(INFO) << "remove " << unused_binlog_key_suffixes.size() + << " invalid binlog meta from dir: " << data_dir->path(); + unused_binlog_key_suffixes.clear(); + } +} + void StorageEngine::_clean_unused_delete_bitmap() { std::unordered_set<int64_t> removed_tablets; auto clean_delete_bitmap_func = [this, &removed_tablets](int64_t tablet_id, int64_t version, diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 274ba19ea9..3c3ecf33e5 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -254,6 +254,8 @@ private: void _clean_unused_rowset_metas(); + void _clean_unused_binlog_metas(); + void _clean_unused_delete_bitmap(); void _clean_unused_pending_publish_info(); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 5ad13feff1..7f03f726da 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -3569,7 +3569,7 @@ void Tablet::gc_binlogs(int64_t version) { if (binlog_meta_entry_pb.has_rowset_id_v2()) { rowset_id = binlog_meta_entry_pb.rowset_id_v2(); } else { - // key is 'binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593', extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593' + // key is 'binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593', extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593' auto pos = key.rfind('_'); if (pos == std::string::npos) { LOG(WARNING) << fmt::format("invalid binlog meta key:{}", key); diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java index 48d5e04244..7a390f6b1d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java @@ -23,7 +23,6 @@ import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; import java.util.Collections; -import java.util.List; import java.util.Map; public class BinlogTombstone { @@ -36,12 +35,6 @@ public class BinlogTombstone { @SerializedName(value = "commitSeq") private long commitSeq; - // TODO(deadlinefen): delete this field later - // This is a reserved field for the transition between new and old versions. - // It will be deleted later - @SerializedName(value = "tableIds") - private List<Long> tableIds; - @SerializedName(value = "tableCommitSeqMap") private Map<Long, Long> tableCommitSeqMap; @@ -54,7 +47,6 @@ public class BinlogTombstone { this.dbBinlogTombstone = isDbTombstone; this.dbId = dbId; this.commitSeq = -1; - this.tableIds = Collections.emptyList(); this.tableCommitSeqMap = Maps.newHashMap(); } @@ -62,7 +54,6 @@ public class BinlogTombstone { this.dbBinlogTombstone = false; this.dbId = -1; this.commitSeq = commitSeq; - this.tableIds = Collections.emptyList(); this.tableCommitSeqMap = Collections.singletonMap(tableId, commitSeq); } @@ -92,14 +83,6 @@ public class BinlogTombstone { return dbId; } - // TODO(deadlinefen): deprecated this code later - public List<Long> getTableIds() { - if (tableIds == null) { - tableIds = Collections.emptyList(); - } - return tableIds; - } - public Map<Long, Long> getTableCommitSeqMap() { if (tableCommitSeqMap == null) { tableCommitSeqMap = Maps.newHashMap(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java index 35134eca87..58cb30bdde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java @@ -424,17 +424,6 @@ public class DBBinlog { } Map<Long, Long> tableCommitSeqMap = tombstone.getTableCommitSeqMap(); - // TODO(deadlinefen): delete this code - // This is a reserved code for the transition between new and old versions. - // It will be deleted later - if (tableCommitSeqMap.isEmpty()) { - long commitSeq = tombstone.getCommitSeq(); - List<Long> tableIds = tombstone.getTableIds(); - for (long tableId : tableIds) { - tableCommitSeqMap.put(tableId, commitSeq); - } - } - for (TableBinlog tableBinlog : tableBinlogs) { long tableId = tableBinlog.getTableId(); if (tableCommitSeqMap.containsKey(tableId)) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org