This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 53760a54dd6 [improvement](create tablet) be choose disk tolerate with 
little skew (#30354)
53760a54dd6 is described below

commit 53760a54dd61fa62cfff2f9cb7b83cfbadbf9174
Author: yujun <yu.jun.re...@gmail.com>
AuthorDate: Thu Jan 25 23:59:37 2024 +0800

    [improvement](create tablet) be choose disk tolerate with little skew 
(#30354)
---
 be/src/common/config.cpp       |  3 +++
 be/src/common/config.h         |  3 +++
 be/src/olap/storage_engine.cpp | 53 +++++++++++++++++++++++++++++++++---------
 be/src/olap/storage_engine.h   |  4 +---
 4 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index f006fa43342..6954de836ca 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1164,6 +1164,9 @@ DEFINE_mInt32(report_query_statistics_interval_ms, 
"3000");
 // 30s
 DEFINE_mInt32(query_statistics_reserve_timeout_ms, "30000");
 
+// consider two high usage disk at the same available level if they do not 
exceed this diff.
+DEFINE_mDouble(high_disk_avail_level_diff_usages, "0.15");
+
 // create tablet in partition random robin idx lru size, default 10000
 DEFINE_Int32(partition_disk_index_lru_size, "10000");
 
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 64555adbbb7..8a33c8c19d1 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1240,6 +1240,9 @@ DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
 DECLARE_mInt32(report_query_statistics_interval_ms);
 DECLARE_mInt32(query_statistics_reserve_timeout_ms);
 
+// consider two high usage disk at the same available level if they do not 
exceed this diff.
+DECLARE_mDouble(high_disk_avail_level_diff_usages);
+
 // create tablet in partition random robin idx lru size, default 10000
 DECLARE_Int32(partition_disk_index_lru_size);
 
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 2090127f41c..069734d8acd 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -456,16 +456,6 @@ Status StorageEngine::set_cluster_id(int32_t cluster_id) {
     return Status::OK();
 }
 
-StorageEngine::DiskRemainingLevel get_available_level(double 
disk_usage_percent) {
-    assert(disk_usage_percent <= 1);
-    if (disk_usage_percent < 0.7) {
-        return StorageEngine::DiskRemainingLevel::LOW;
-    } else if (disk_usage_percent < 0.85) {
-        return StorageEngine::DiskRemainingLevel::MID;
-    }
-    return StorageEngine::DiskRemainingLevel::HIGH;
-}
-
 int StorageEngine::_get_and_set_next_disk_index(int64 partition_id,
                                                 TStorageMedium::type 
storage_medium) {
     auto key = CreateTabletIdxCache::get_key(partition_id, storage_medium);
@@ -481,6 +471,7 @@ int StorageEngine::_get_and_set_next_disk_index(int64 
partition_id,
 
 void StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
                                           std::vector<DirInfo>& dir_infos) {
+    std::vector<double> usages;
     for (auto& it : _store_map) {
         DataDir* data_dir = it.second.get();
         if (data_dir->is_used()) {
@@ -489,11 +480,51 @@ void 
StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
                 !data_dir->reach_capacity_limit(0)) {
                 DirInfo dir_info;
                 dir_info.data_dir = data_dir;
-                dir_info.available_level = 
get_available_level(data_dir->get_usage(0));
+                dir_info.available_level = 0;
+                usages.push_back(data_dir->get_usage(0));
                 dir_infos.push_back(dir_info);
             }
         }
     }
+
+    if (dir_infos.size() <= 1) {
+        return;
+    }
+
+    std::sort(usages.begin(), usages.end());
+    if (usages.back() < 0.7) {
+        return;
+    }
+
+    std::vector<double> level_min_usages;
+    level_min_usages.push_back(usages[0]);
+    for (auto usage : usages) {
+        // usage < 0.7 consider as one level, give a small skew
+        if (usage < 0.7 - (config::high_disk_avail_level_diff_usages / 2.0)) {
+            continue;
+        }
+
+        // at high usages,  default 15% is one level
+        // for example: there disk usages are:   0.66,  0.72,  0.83
+        // then level_min_usages = [0.66, 0.83], divide disks into 2 levels:  
[0.66, 0.72], [0.83]
+        if (usage >= level_min_usages.back() + 
config::high_disk_avail_level_diff_usages) {
+            level_min_usages.push_back(usage);
+        }
+    }
+    for (auto& dir_info : dir_infos) {
+        double usage = dir_info.data_dir->get_usage(0);
+        for (size_t i = 1; i < level_min_usages.size() && usage >= 
level_min_usages[i]; i++) {
+            dir_info.available_level++;
+        }
+
+        // when usage is too high, no matter consider balance now,
+        // make it a higher level.
+        // for example, two disks and usages are: 0.85 and 0.92, then let 
tablets fall on the first disk.
+        // by default, storage_flood_stage_usage_percent = 90
+        if (usage > config::storage_flood_stage_usage_percent / 100.0) {
+            dir_info.available_level++;
+        }
+    }
 }
 
 std::vector<DataDir*> StorageEngine::get_stores_for_create_tablet(
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index b2b72b6d523..bc581aa329a 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -150,8 +150,6 @@ public:
     StorageEngine(const EngineOptions& options);
     ~StorageEngine() override;
 
-    enum class DiskRemainingLevel { LOW, MID, HIGH };
-
     Status open() override;
 
     Status create_tablet(const TCreateTabletReq& request, RuntimeProfile* 
profile);
@@ -541,7 +539,7 @@ public:
 struct DirInfo {
     DataDir* data_dir;
 
-    StorageEngine::DiskRemainingLevel available_level;
+    int available_level = 0;
 
     bool operator<(const DirInfo& other) const {
         if (available_level != other.available_level) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to