This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 32dda416c27 [log](gc) add log for unused rowsets gc (#30854)
32dda416c27 is described below

commit 32dda416c2717b160a70f1a370620648ca6e888c
Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com>
AuthorDate: Tue Feb 6 17:37:13 2024 +0800

    [log](gc) add log for unused rowsets gc (#30854)
---
 be/src/olap/data_dir.cpp       |  8 ++++----
 be/src/olap/storage_engine.cpp | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 0f6d279e729..d67a97877df 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -713,7 +713,7 @@ void DataDir::_perform_path_gc_by_rowset(const 
std::vector<std::string>& tablet_
         bool is_valid = 
doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
                 path, &tablet_id, &schema_hash);
         if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
-            LOG(WARNING) << "unknown path:" << path;
+            LOG(WARNING) << "[path gc] unknown path:" << path;
             continue;
         }
 
@@ -734,7 +734,7 @@ void DataDir::_perform_path_gc_by_rowset(const 
std::vector<std::string>& tablet_
         std::vector<io::FileInfo> files;
         auto st = io::global_local_filesystem()->list(path, true, &files, 
&exists);
         if (!st.ok()) [[unlikely]] {
-            LOG(WARNING) << "fail to list tablet path " << path << " : " << st;
+            LOG(WARNING) << "[path gc] fail to list tablet path " << path << " 
: " << st;
             continue;
         }
 
@@ -763,10 +763,10 @@ void DataDir::_perform_path_gc_by_rowset(const 
std::vector<std::string>& tablet_
         auto reclaim_rowset_file = [](const std::string& path) {
             auto st = io::global_local_filesystem()->delete_file(path);
             if (!st.ok()) [[unlikely]] {
-                LOG(WARNING) << "failed to delete garbage rowset file: " << st;
+                LOG(WARNING) << "[path gc] failed to delete garbage rowset 
file: " << st;
                 return;
             }
-            LOG(INFO) << "delete garbage path: " << path; // Audit log
+            LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit 
log
         };
 
         auto should_reclaim = [&, this](const RowsetId& rowset_id) {
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 7323b955383..4c64f8dfea5 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -1068,8 +1068,12 @@ void StorageEngine::_parse_default_rowset_type() {
 }
 
 void StorageEngine::start_delete_unused_rowset() {
+    LOG(INFO) << "start to delete unused rowset, size: " << 
_unused_rowsets.size();
     std::vector<RowsetSharedPtr> unused_rowsets_copy;
     unused_rowsets_copy.reserve(_unused_rowsets.size());
+    auto due_to_use_count = 0;
+    auto due_to_not_delete_file = 0;
+    auto due_to_delayed_expired_ts = 0;
     {
         std::lock_guard<std::mutex> lock(_gc_mutex);
         for (auto it = _unused_rowsets.begin(); it != _unused_rowsets.end();) {
@@ -1086,9 +1090,20 @@ void StorageEngine::start_delete_unused_rowset() {
                 it = _unused_rowsets.erase(it);
             } else {
                 ++it;
+                if (rs.use_count() != 1) {
+                    ++due_to_use_count;
+                } else if (!rs->need_delete_file()) {
+                    ++due_to_not_delete_file;
+                } else {
+                    ++due_to_delayed_expired_ts;
+                }
             }
         }
     }
+    LOG(INFO) << "collected " << unused_rowsets_copy.size() << " unused 
rowsets to remove, skipped "
+              << due_to_use_count << " rowsets due to use count > 1, skipped "
+              << due_to_not_delete_file << " rowsets due to don't need to 
delete file, skipped "
+              << due_to_delayed_expired_ts << " rowsets due to delayed expired 
timestamp.";
     for (auto&& rs : unused_rowsets_copy) {
         VLOG_NOTICE << "start to remove rowset:" << rs->rowset_id()
                     << ", version:" << rs->version();
@@ -1101,6 +1116,7 @@ void StorageEngine::start_delete_unused_rowset() {
         Status status = rs->remove();
         VLOG_NOTICE << "remove rowset:" << rs->rowset_id() << " finished. 
status:" << status;
     }
+    LOG(INFO) << "removed all collected unused rowsets";
 }
 
 void StorageEngine::add_unused_rowset(RowsetSharedPtr rowset) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to