This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d758de7a2 [improvement](binlog) gc be binlog metas when tablet is 
dropped. (#22447)
3d758de7a2 is described below

commit 3d758de7a27a228c79628164249e192dc9b8ba22
Author: DeadlineFen <117912096+deadline...@users.noreply.github.com>
AuthorDate: Fri Aug 4 14:38:13 2023 +0800

    [improvement](binlog) gc be binlog metas when tablet is dropped. (#22447)
---
 be/src/olap/binlog.h                               |  5 ++-
 be/src/olap/olap_meta.cpp                          | 10 ++++-
 be/src/olap/olap_meta.h                            |  3 ++
 be/src/olap/rowset/rowset_meta_manager.cpp         | 46 +++++++++++++++++++++-
 be/src/olap/rowset/rowset_meta_manager.h           | 11 ++++--
 be/src/olap/storage_engine.cpp                     | 36 +++++++++++++++++
 be/src/olap/storage_engine.h                       |  2 +
 be/src/olap/tablet.cpp                             |  2 +-
 .../org/apache/doris/binlog/BinlogTombstone.java   | 17 --------
 .../java/org/apache/doris/binlog/DBBinlog.java     | 11 ------
 10 files changed, 105 insertions(+), 38 deletions(-)

diff --git a/be/src/olap/binlog.h b/be/src/olap/binlog.h
index b6b95a9530..57853bd6db 100644
--- a/be/src/olap/binlog.h
+++ b/be/src/olap/binlog.h
@@ -25,8 +25,9 @@
 #include "olap/olap_common.h"
 
 namespace doris {
-constexpr std::string_view kBinlogPrefix = "binglog_";
+constexpr std::string_view kBinlogPrefix = "binlog_";
 constexpr std::string_view kBinlogMetaPrefix = "binlog_meta_";
+constexpr std::string_view kBinlogDataPrefix = "binlog_data_";
 
 inline auto make_binlog_meta_key(std::string_view tablet, int64_t version,
                                  std::string_view rowset) {
@@ -82,7 +83,7 @@ inline bool starts_with_binlog_meta(std::string_view str) {
 }
 
 inline std::string get_binlog_data_key_from_meta_key(std::string_view 
meta_key) {
-    // like "binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7" => 
"binglog_data-6943f1585fe834b5-e542c2b83a21d0b7"
+    // like "binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7" => 
"binlog_data-6943f1585fe834b5-e542c2b83a21d0b7"
     return fmt::format("{}data_{}", kBinlogPrefix, 
meta_key.substr(kBinlogMetaPrefix.length()));
 }
 } // namespace doris
diff --git a/be/src/olap/olap_meta.cpp b/be/src/olap/olap_meta.cpp
index 29bb48cf43..f393d4a262 100644
--- a/be/src/olap/olap_meta.cpp
+++ b/be/src/olap/olap_meta.cpp
@@ -277,12 +277,18 @@ Status OlapMeta::remove(const int column_family_index, 
const std::vector<std::st
 
 Status OlapMeta::iterate(const int column_family_index, const std::string& 
prefix,
                          std::function<bool(const std::string&, const 
std::string&)> const& func) {
+    return iterate(column_family_index, prefix, prefix, func);
+}
+
+Status OlapMeta::iterate(const int column_family_index, const std::string& 
seek_key,
+                         const std::string& prefix,
+                         std::function<bool(const std::string&, const 
std::string&)> const& func) {
     auto& handle = _handles[column_family_index];
     std::unique_ptr<Iterator> it(_db->NewIterator(ReadOptions(), 
handle.get()));
-    if (prefix == "") {
+    if (seek_key == "") {
         it->SeekToFirst();
     } else {
-        it->Seek(prefix);
+        it->Seek(seek_key);
     }
     rocksdb::Status status = it->status();
     if (!status.ok()) {
diff --git a/be/src/olap/olap_meta.h b/be/src/olap/olap_meta.h
index 174f2d065f..504a07f90f 100644
--- a/be/src/olap/olap_meta.h
+++ b/be/src/olap/olap_meta.h
@@ -60,6 +60,9 @@ public:
 
     Status iterate(const int column_family_index, const std::string& prefix,
                    std::function<bool(const std::string&, const std::string&)> 
const& func);
+    Status iterate(const int column_family_index, const std::string& seek_key,
+                   const std::string& prefix,
+                   std::function<bool(const std::string&, const std::string&)> 
const& func);
 
     std::string get_root_path() const { return _root_path; }
 
diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp 
b/be/src/olap/rowset/rowset_meta_manager.cpp
index 32cb7c57eb..7047b965dc 100644
--- a/be/src/olap/rowset/rowset_meta_manager.cpp
+++ b/be/src/olap/rowset/rowset_meta_manager.cpp
@@ -170,7 +170,7 @@ std::vector<std::string> 
RowsetMetaManager::get_binlog_filenames(OlapMeta* meta,
     auto traverse_func = [&rowset_id, &num_segments](const std::string& key,
                                                      const std::string& value) 
-> bool {
         VLOG_DEBUG << fmt::format("key:{}, value:{}", key, value);
-        // key is 
'binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593',
 extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593'
+        // key is 
'binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593',
 extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593'
         // check starts with "binlog_meta_"
         if (!starts_with_binlog_meta(key)) {
             LOG(WARNING) << fmt::format("invalid binlog meta key:{}", key);
@@ -229,7 +229,7 @@ std::pair<std::string, int64_t> 
RowsetMetaManager::get_binlog_info(
     auto traverse_func = [&rowset_id, &num_segments](const std::string& key,
                                                      const std::string& value) 
-> bool {
         VLOG_DEBUG << fmt::format("key:{}, value:{}", key, value);
-        // key is 
'binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593',
 extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593'
+        // key is 
'binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593',
 extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593'
         auto pos = key.rfind('_');
         if (pos == std::string::npos) {
             LOG(WARNING) << fmt::format("invalid binlog meta key:{}", key);
@@ -283,6 +283,11 @@ Status RowsetMetaManager::remove(OlapMeta* meta, TabletUid 
tablet_uid, const Row
     return status;
 }
 
+Status RowsetMetaManager::remove_binlog(OlapMeta* meta, const std::string& 
suffix) {
+    return meta->remove(META_COLUMN_FAMILY_INDEX,
+                        {kBinlogMetaPrefix.data() + suffix, 
kBinlogDataPrefix.data() + suffix});
+}
+
 Status RowsetMetaManager::traverse_rowset_metas(
         OlapMeta* meta,
         std::function<bool(const TabletUid&, const RowsetId&, const 
std::string&)> const& func) {
@@ -307,6 +312,43 @@ Status RowsetMetaManager::traverse_rowset_metas(
     return status;
 }
 
+Status RowsetMetaManager::traverse_binlog_metas(
+        OlapMeta* meta, std::function<bool(const string&, const string&, 
bool)> const& collector) {
+    std::pair<std::string, bool> last_info = 
std::make_pair(kBinlogMetaPrefix.data(), false);
+    bool seek_found = false;
+    Status status;
+    auto traverse_binlog_meta_func = [&last_info, &seek_found, &collector](
+                                             const std::string& key,
+                                             const std::string& value) -> bool 
{
+        seek_found = true;
+        auto& [last_prefix, need_collect] = last_info;
+        size_t pos = key.find('_', kBinlogMetaPrefix.size());
+        if (pos == std::string::npos) {
+            LOG(WARNING) << "invalid binlog meta key: " << key;
+            return true;
+        }
+        std::string_view key_view(key.data(), pos);
+        std::string_view last_prefix_view(last_prefix.data(), 
last_prefix.size() - 1);
+
+        if (last_prefix_view != key_view) {
+            need_collect = collector(key, value, true);
+            last_prefix = std::string(key_view) + "~";
+        } else if (need_collect) {
+            collector(key, value, false);
+        }
+
+        return need_collect;
+    };
+
+    do {
+        seek_found = false;
+        status = meta->iterate(META_COLUMN_FAMILY_INDEX, last_info.first, 
kBinlogMetaPrefix.data(),
+                               traverse_binlog_meta_func);
+    } while (status.ok() && seek_found);
+
+    return status;
+}
+
 Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta,
                                                 const std::string& 
rowset_meta_path) {
     std::ifstream infile(rowset_meta_path);
diff --git a/be/src/olap/rowset/rowset_meta_manager.h 
b/be/src/olap/rowset/rowset_meta_manager.h
index e859b207e9..f0e834790b 100644
--- a/be/src/olap/rowset/rowset_meta_manager.h
+++ b/be/src/olap/rowset/rowset_meta_manager.h
@@ -64,9 +64,14 @@ public:
 
     static Status remove(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& 
rowset_id);
 
-    static Status traverse_rowset_metas(
-            OlapMeta* meta,
-            std::function<bool(const TabletUid&, const RowsetId&, const 
std::string&)> const& func);
+    static Status remove_binlog(OlapMeta* meta, const std::string& suffix);
+
+    static Status traverse_rowset_metas(OlapMeta* meta,
+                                        std::function<bool(const TabletUid&, 
const RowsetId&,
+                                                           const 
std::string&)> const& collector);
+
+    static Status traverse_binlog_metas(
+            OlapMeta* meta, std::function<bool(const string&, const string&, 
bool)> const& func);
 
     static Status load_json_rowset_meta(OlapMeta* meta, const std::string& 
rowset_meta_path);
 
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 340f02b65b..fb07235702 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -693,6 +693,9 @@ Status StorageEngine::start_trash_sweep(double* usage, bool 
ignore_guard) {
     // clean unused rowset metas in OlapMeta
     _clean_unused_rowset_metas();
 
+    // clean unused binlog metas in OlapMeta
+    _clean_unused_binlog_metas();
+
     // cleand unused delete bitmap for deleted tablet
     _clean_unused_delete_bitmap();
 
@@ -774,6 +777,39 @@ void StorageEngine::_clean_unused_rowset_metas() {
     }
 }
 
+void StorageEngine::_clean_unused_binlog_metas() {
+    std::vector<std::string> unused_binlog_key_suffixes;
+    auto unused_binlog_collector = [this, &unused_binlog_key_suffixes](const 
std::string& key,
+                                                                       const 
std::string& value,
+                                                                       bool 
need_check) -> bool {
+        if (need_check) {
+            BinlogMetaEntryPB binlog_meta_pb;
+            if (UNLIKELY(!binlog_meta_pb.ParseFromString(value))) {
+                LOG(WARNING) << "parse rowset meta string failed for binlog 
meta key: " << key;
+            } else if (_tablet_manager->get_tablet(binlog_meta_pb.tablet_id()) 
== nullptr) {
+                LOG(INFO) << "failed to find tablet " << 
binlog_meta_pb.tablet_id()
+                          << " for binlog rowset: " << 
binlog_meta_pb.rowset_id()
+                          << ", tablet may be dropped";
+            } else {
+                return false;
+            }
+        }
+
+        
unused_binlog_key_suffixes.emplace_back(key.substr(kBinlogMetaPrefix.size()));
+        return true;
+    };
+    auto data_dirs = get_stores();
+    for (auto data_dir : data_dirs) {
+        RowsetMetaManager::traverse_binlog_metas(data_dir->get_meta(), 
unused_binlog_collector);
+        for (const auto& suffix : unused_binlog_key_suffixes) {
+            RowsetMetaManager::remove_binlog(data_dir->get_meta(), suffix);
+        }
+        LOG(INFO) << "remove " << unused_binlog_key_suffixes.size()
+                  << " invalid binlog meta from dir: " << data_dir->path();
+        unused_binlog_key_suffixes.clear();
+    }
+}
+
 void StorageEngine::_clean_unused_delete_bitmap() {
     std::unordered_set<int64_t> removed_tablets;
     auto clean_delete_bitmap_func = [this, &removed_tablets](int64_t 
tablet_id, int64_t version,
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 274ba19ea9..3c3ecf33e5 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -254,6 +254,8 @@ private:
 
     void _clean_unused_rowset_metas();
 
+    void _clean_unused_binlog_metas();
+
     void _clean_unused_delete_bitmap();
 
     void _clean_unused_pending_publish_info();
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 5ad13feff1..7f03f726da 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3569,7 +3569,7 @@ void Tablet::gc_binlogs(int64_t version) {
         if (binlog_meta_entry_pb.has_rowset_id_v2()) {
             rowset_id = binlog_meta_entry_pb.rowset_id_v2();
         } else {
-            // key is 
'binglog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593',
 extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593'
+            // key is 
'binlog_meta_6943f1585fe834b5-e542c2b83a21d0b7_00000000000000000069_020000000000000135449d7cd7eadfe672aa0f928fa99593',
 extract last part '020000000000000135449d7cd7eadfe672aa0f928fa99593'
             auto pos = key.rfind('_');
             if (pos == std::string::npos) {
                 LOG(WARNING) << fmt::format("invalid binlog meta key:{}", key);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java 
b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java
index 48d5e04244..7a390f6b1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogTombstone.java
@@ -23,7 +23,6 @@ import com.google.common.collect.Maps;
 import com.google.gson.annotations.SerializedName;
 
 import java.util.Collections;
-import java.util.List;
 import java.util.Map;
 
 public class BinlogTombstone {
@@ -36,12 +35,6 @@ public class BinlogTombstone {
     @SerializedName(value = "commitSeq")
     private long commitSeq;
 
-    // TODO(deadlinefen): delete this field later
-    // This is a reserved field for the transition between new and old 
versions.
-    // It will be deleted later
-    @SerializedName(value = "tableIds")
-    private List<Long> tableIds;
-
     @SerializedName(value = "tableCommitSeqMap")
     private Map<Long, Long> tableCommitSeqMap;
 
@@ -54,7 +47,6 @@ public class BinlogTombstone {
         this.dbBinlogTombstone = isDbTombstone;
         this.dbId = dbId;
         this.commitSeq = -1;
-        this.tableIds = Collections.emptyList();
         this.tableCommitSeqMap = Maps.newHashMap();
     }
 
@@ -62,7 +54,6 @@ public class BinlogTombstone {
         this.dbBinlogTombstone = false;
         this.dbId = -1;
         this.commitSeq = commitSeq;
-        this.tableIds = Collections.emptyList();
         this.tableCommitSeqMap = Collections.singletonMap(tableId, commitSeq);
     }
 
@@ -92,14 +83,6 @@ public class BinlogTombstone {
         return dbId;
     }
 
-    // TODO(deadlinefen): deprecated this code later
-    public List<Long> getTableIds() {
-        if (tableIds == null) {
-            tableIds = Collections.emptyList();
-        }
-        return tableIds;
-    }
-
     public Map<Long, Long> getTableCommitSeqMap() {
         if (tableCommitSeqMap == null) {
             tableCommitSeqMap = Maps.newHashMap();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java 
b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java
index 35134eca87..58cb30bdde 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java
@@ -424,17 +424,6 @@ public class DBBinlog {
         }
 
         Map<Long, Long> tableCommitSeqMap = tombstone.getTableCommitSeqMap();
-        // TODO(deadlinefen): delete this code
-        // This is a reserved code for the transition between new and old 
versions.
-        // It will be deleted later
-        if (tableCommitSeqMap.isEmpty()) {
-            long commitSeq = tombstone.getCommitSeq();
-            List<Long> tableIds = tombstone.getTableIds();
-            for (long tableId : tableIds) {
-                tableCommitSeqMap.put(tableId, commitSeq);
-            }
-        }
-
         for (TableBinlog tableBinlog : tableBinlogs) {
             long tableId = tableBinlog.getTableId();
             if (tableCommitSeqMap.containsKey(tableId)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to